/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/MachineScheduler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // MachineScheduler schedules machine instructions after phi elimination. It |
11 | | // preserves LiveIntervals so it can be invoked before register allocation. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "llvm/CodeGen/MachineScheduler.h" |
16 | | #include "llvm/ADT/ArrayRef.h" |
17 | | #include "llvm/ADT/BitVector.h" |
18 | | #include "llvm/ADT/DenseMap.h" |
19 | | #include "llvm/ADT/PriorityQueue.h" |
20 | | #include "llvm/ADT/STLExtras.h" |
21 | | #include "llvm/ADT/SmallVector.h" |
22 | | #include "llvm/ADT/iterator_range.h" |
23 | | #include "llvm/Analysis/AliasAnalysis.h" |
24 | | #include "llvm/CodeGen/LiveInterval.h" |
25 | | #include "llvm/CodeGen/LiveIntervalAnalysis.h" |
26 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
27 | | #include "llvm/CodeGen/MachineDominators.h" |
28 | | #include "llvm/CodeGen/MachineFunction.h" |
29 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | | #include "llvm/CodeGen/MachineInstr.h" |
31 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
32 | | #include "llvm/CodeGen/MachineOperand.h" |
33 | | #include "llvm/CodeGen/MachinePassRegistry.h" |
34 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
35 | | #include "llvm/CodeGen/MachineValueType.h" |
36 | | #include "llvm/CodeGen/Passes.h" |
37 | | #include "llvm/CodeGen/RegisterClassInfo.h" |
38 | | #include "llvm/CodeGen/RegisterPressure.h" |
39 | | #include "llvm/CodeGen/ScheduleDAG.h" |
40 | | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
41 | | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
42 | | #include "llvm/CodeGen/ScheduleDFS.h" |
43 | | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" |
44 | | #include "llvm/CodeGen/SlotIndexes.h" |
45 | | #include "llvm/CodeGen/TargetPassConfig.h" |
46 | | #include "llvm/CodeGen/TargetSchedule.h" |
47 | | #include "llvm/MC/LaneBitmask.h" |
48 | | #include "llvm/Pass.h" |
49 | | #include "llvm/Support/CommandLine.h" |
50 | | #include "llvm/Support/Compiler.h" |
51 | | #include "llvm/Support/Debug.h" |
52 | | #include "llvm/Support/ErrorHandling.h" |
53 | | #include "llvm/Support/GraphWriter.h" |
54 | | #include "llvm/Support/raw_ostream.h" |
55 | | #include "llvm/Target/TargetInstrInfo.h" |
56 | | #include "llvm/Target/TargetLowering.h" |
57 | | #include "llvm/Target/TargetRegisterInfo.h" |
58 | | #include "llvm/Target/TargetSubtargetInfo.h" |
59 | | #include <algorithm> |
60 | | #include <cassert> |
61 | | #include <cstdint> |
62 | | #include <iterator> |
63 | | #include <limits> |
64 | | #include <memory> |
65 | | #include <string> |
66 | | #include <tuple> |
67 | | #include <utility> |
68 | | #include <vector> |
69 | | |
70 | | using namespace llvm; |
71 | | |
72 | | #define DEBUG_TYPE "machine-scheduler" |
73 | | |
74 | | namespace llvm { |
75 | | |
76 | | cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, |
77 | | cl::desc("Force top-down list scheduling")); |
78 | | cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, |
79 | | cl::desc("Force bottom-up list scheduling")); |
80 | | cl::opt<bool> |
81 | | DumpCriticalPathLength("misched-dcpl", cl::Hidden, |
82 | | cl::desc("Print critical path length to stdout")); |
83 | | |
84 | | } // end namespace llvm |
85 | | |
86 | | #ifndef NDEBUG |
87 | | static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, |
88 | | cl::desc("Pop up a window to show MISched dags after they are processed")); |
89 | | |
90 | | /// In some situations a few uninteresting nodes depend on nearly all other |
91 | | /// nodes in the graph, provide a cutoff to hide them. |
92 | | static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, |
93 | | cl::desc("Hide nodes with more predecessor/successor than cutoff")); |
94 | | |
95 | | static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, |
96 | | cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); |
97 | | |
98 | | static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, |
99 | | cl::desc("Only schedule this function")); |
100 | | static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, |
101 | | cl::desc("Only schedule this MBB#")); |
102 | | #else |
103 | | static bool ViewMISchedDAGs = false; |
104 | | #endif // NDEBUG |
105 | | |
106 | | /// Avoid quadratic complexity in unusually large basic blocks by limiting the |
107 | | /// size of the ready lists. |
108 | | static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden, |
109 | | cl::desc("Limit ready list to N instructions"), cl::init(256)); |
110 | | |
111 | | static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, |
112 | | cl::desc("Enable register pressure scheduling."), cl::init(true)); |
113 | | |
114 | | static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, |
115 | | cl::desc("Enable cyclic critical path analysis."), cl::init(true)); |
116 | | |
117 | | static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, |
118 | | cl::desc("Enable memop clustering."), |
119 | | cl::init(true)); |
120 | | |
121 | | static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, |
122 | | cl::desc("Verify machine instrs before and after machine scheduling")); |
123 | | |
124 | | // DAG subtrees must have at least this many nodes. |
125 | | static const unsigned MinSubtreeSize = 8; |
126 | | |
127 | | // Pin the vtables to this file. |
128 | 0 | void MachineSchedStrategy::anchor() {} |
129 | | |
130 | 0 | void ScheduleDAGMutation::anchor() {} |
131 | | |
132 | | //===----------------------------------------------------------------------===// |
133 | | // Machine Instruction Scheduling Pass and Registry |
134 | | //===----------------------------------------------------------------------===// |
135 | | |
136 | 46.6k | MachineSchedContext::MachineSchedContext() { |
137 | 46.6k | RegClassInfo = new RegisterClassInfo(); |
138 | 46.6k | } |
139 | | |
140 | 46.5k | MachineSchedContext::~MachineSchedContext() { |
141 | 46.5k | delete RegClassInfo; |
142 | 46.5k | } |
143 | | |
144 | | namespace { |
145 | | |
146 | | /// Base class for a machine scheduler class that can run at any point. |
147 | | class MachineSchedulerBase : public MachineSchedContext, |
148 | | public MachineFunctionPass { |
149 | | public: |
150 | 46.6k | MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} |
151 | | |
152 | | void print(raw_ostream &O, const Module* = nullptr) const override; |
153 | | |
154 | | protected: |
155 | | void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); |
156 | | }; |
157 | | |
158 | | /// MachineScheduler runs after coalescing and before register allocation. |
159 | | class MachineScheduler : public MachineSchedulerBase { |
160 | | public: |
161 | | MachineScheduler(); |
162 | | |
163 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
164 | | |
165 | | bool runOnMachineFunction(MachineFunction&) override; |
166 | | |
167 | | static char ID; // Class identification, replacement for typeinfo |
168 | | |
169 | | protected: |
170 | | ScheduleDAGInstrs *createMachineScheduler(); |
171 | | }; |
172 | | |
173 | | /// PostMachineScheduler runs after shortly before code emission. |
174 | | class PostMachineScheduler : public MachineSchedulerBase { |
175 | | public: |
176 | | PostMachineScheduler(); |
177 | | |
178 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
179 | | |
180 | | bool runOnMachineFunction(MachineFunction&) override; |
181 | | |
182 | | static char ID; // Class identification, replacement for typeinfo |
183 | | |
184 | | protected: |
185 | | ScheduleDAGInstrs *createPostMachineScheduler(); |
186 | | }; |
187 | | |
188 | | } // end anonymous namespace |
189 | | |
190 | | char MachineScheduler::ID = 0; |
191 | | |
192 | | char &llvm::MachineSchedulerID = MachineScheduler::ID; |
193 | | |
194 | 36.7k | INITIALIZE_PASS_BEGIN36.7k (MachineScheduler, DEBUG_TYPE,
|
195 | 36.7k | "Machine Instruction Scheduler", false, false) |
196 | 36.7k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
197 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) |
198 | 36.7k | INITIALIZE_PASS_DEPENDENCY(SlotIndexes) |
199 | 36.7k | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
200 | 36.7k | INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, |
201 | | "Machine Instruction Scheduler", false, false) |
202 | | |
203 | 32.0k | MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { |
204 | 32.0k | initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); |
205 | 32.0k | } |
206 | | |
207 | 32.0k | void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { |
208 | 32.0k | AU.setPreservesCFG(); |
209 | 32.0k | AU.addRequiredID(MachineDominatorsID); |
210 | 32.0k | AU.addRequired<MachineLoopInfo>(); |
211 | 32.0k | AU.addRequired<AAResultsWrapperPass>(); |
212 | 32.0k | AU.addRequired<TargetPassConfig>(); |
213 | 32.0k | AU.addRequired<SlotIndexes>(); |
214 | 32.0k | AU.addPreserved<SlotIndexes>(); |
215 | 32.0k | AU.addRequired<LiveIntervals>(); |
216 | 32.0k | AU.addPreserved<LiveIntervals>(); |
217 | 32.0k | MachineFunctionPass::getAnalysisUsage(AU); |
218 | 32.0k | } |
219 | | |
220 | | char PostMachineScheduler::ID = 0; |
221 | | |
222 | | char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; |
223 | | |
224 | | INITIALIZE_PASS(PostMachineScheduler, "postmisched", |
225 | | "PostRA Machine Instruction Scheduler", false, false) |
226 | | |
227 | 14.5k | PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { |
228 | 14.5k | initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); |
229 | 14.5k | } |
230 | | |
231 | 14.5k | void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { |
232 | 14.5k | AU.setPreservesCFG(); |
233 | 14.5k | AU.addRequiredID(MachineDominatorsID); |
234 | 14.5k | AU.addRequired<MachineLoopInfo>(); |
235 | 14.5k | AU.addRequired<TargetPassConfig>(); |
236 | 14.5k | MachineFunctionPass::getAnalysisUsage(AU); |
237 | 14.5k | } |
238 | | |
239 | | MachinePassRegistry MachineSchedRegistry::Registry; |
240 | | |
241 | | /// A dummy default scheduler factory indicates whether the scheduler |
242 | | /// is overridden on the command line. |
243 | 0 | static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { |
244 | 0 | return nullptr; |
245 | 0 | } |
246 | | |
247 | | /// MachineSchedOpt allows command line selection of the scheduler. |
248 | | static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, |
249 | | RegisterPassParser<MachineSchedRegistry>> |
250 | | MachineSchedOpt("misched", |
251 | | cl::init(&useDefaultMachineSched), cl::Hidden, |
252 | | cl::desc("Machine instruction scheduler to use")); |
253 | | |
254 | | static MachineSchedRegistry |
255 | | DefaultSchedRegistry("default", "Use the target's default scheduler choice.", |
256 | | useDefaultMachineSched); |
257 | | |
258 | | static cl::opt<bool> EnableMachineSched( |
259 | | "enable-misched", |
260 | | cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), |
261 | | cl::Hidden); |
262 | | |
263 | | static cl::opt<bool> EnablePostRAMachineSched( |
264 | | "enable-post-misched", |
265 | | cl::desc("Enable the post-ra machine instruction scheduling pass."), |
266 | | cl::init(true), cl::Hidden); |
267 | | |
268 | | /// Decrement this iterator until reaching the top or a non-debug instr. |
269 | | static MachineBasicBlock::const_iterator |
270 | | priorNonDebug(MachineBasicBlock::const_iterator I, |
271 | 19.3M | MachineBasicBlock::const_iterator Beg) { |
272 | 19.3M | assert(I != Beg && "reached the top of the region, cannot decrement"); |
273 | 19.3M | while (--I != Beg19.3M ) { |
274 | 15.2M | if (!I->isDebugValue()) |
275 | 15.2M | break; |
276 | 15.2M | } |
277 | 19.3M | return I; |
278 | 19.3M | } |
279 | | |
280 | | /// Non-const version. |
281 | | static MachineBasicBlock::iterator |
282 | | priorNonDebug(MachineBasicBlock::iterator I, |
283 | 19.3M | MachineBasicBlock::const_iterator Beg) { |
284 | 19.3M | return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg) |
285 | 19.3M | .getNonConstIterator(); |
286 | 19.3M | } |
287 | | |
288 | | /// If this iterator is a debug value, increment until reaching the End or a |
289 | | /// non-debug instruction. |
290 | | static MachineBasicBlock::const_iterator |
291 | | nextIfDebug(MachineBasicBlock::const_iterator I, |
292 | 11.9M | MachineBasicBlock::const_iterator End) { |
293 | 11.9M | for(; I != End11.9M ; ++I359 ) { |
294 | 11.3M | if (!I->isDebugValue()) |
295 | 11.3M | break; |
296 | 11.3M | } |
297 | 11.9M | return I; |
298 | 11.9M | } |
299 | | |
300 | | /// Non-const version. |
301 | | static MachineBasicBlock::iterator |
302 | | nextIfDebug(MachineBasicBlock::iterator I, |
303 | 9.80M | MachineBasicBlock::const_iterator End) { |
304 | 9.80M | return nextIfDebug(MachineBasicBlock::const_iterator(I), End) |
305 | 9.80M | .getNonConstIterator(); |
306 | 9.80M | } |
307 | | |
308 | | /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. |
309 | 552k | ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { |
310 | 552k | // Select the scheduler, or set the default. |
311 | 552k | MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; |
312 | 552k | if (Ctor != useDefaultMachineSched) |
313 | 15 | return Ctor(this); |
314 | 552k | |
315 | 552k | // Get the default scheduler set by the target for this function. |
316 | 552k | ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); |
317 | 552k | if (Scheduler) |
318 | 546k | return Scheduler; |
319 | 5.96k | |
320 | 5.96k | // Default to GenericScheduler. |
321 | 5.96k | return createGenericSchedLive(this); |
322 | 5.96k | } |
323 | | |
324 | | /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by |
325 | | /// the caller. We don't have a command line option to override the postRA |
326 | | /// scheduler. The Target must configure it. |
327 | 11.4k | ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { |
328 | 11.4k | // Get the postRA scheduler set by the target for this function. |
329 | 11.4k | ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); |
330 | 11.4k | if (Scheduler) |
331 | 11.3k | return Scheduler; |
332 | 101 | |
333 | 101 | // Default to GenericScheduler. |
334 | 101 | return createGenericSchedPostRA(this); |
335 | 101 | } |
336 | | |
337 | | /// Top-level MachineScheduler pass driver. |
338 | | /// |
339 | | /// Visit blocks in function order. Divide each block into scheduling regions |
340 | | /// and visit them bottom-up. Visiting regions bottom-up is not required, but is |
341 | | /// consistent with the DAG builder, which traverses the interior of the |
342 | | /// scheduling regions bottom-up. |
343 | | /// |
344 | | /// This design avoids exposing scheduling boundaries to the DAG builder, |
345 | | /// simplifying the DAG builder's support for "special" target instructions. |
346 | | /// At the same time the design allows target schedulers to operate across |
347 | | /// scheduling boundaries, for example to bundle the boudary instructions |
348 | | /// without reordering them. This creates complexity, because the target |
349 | | /// scheduler must update the RegionBegin and RegionEnd positions cached by |
350 | | /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler |
351 | | /// design would be to split blocks at scheduling boundaries, but LLVM has a |
352 | | /// general bias against block splitting purely for implementation simplicity. |
353 | 588k | bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { |
354 | 588k | if (skipFunction(*mf.getFunction())) |
355 | 44 | return false; |
356 | 588k | |
357 | 588k | if (588k EnableMachineSched.getNumOccurrences()588k ) { |
358 | 459 | if (!EnableMachineSched) |
359 | 372 | return false; |
360 | 587k | } else if (587k !mf.getSubtarget().enableMachineScheduler()587k ) |
361 | 35.9k | return false; |
362 | 552k | |
363 | 552k | DEBUG552k (dbgs() << "Before MISched:\n"; mf.print(dbgs())); |
364 | 552k | |
365 | 552k | // Initialize the context of the pass. |
366 | 552k | MF = &mf; |
367 | 552k | MLI = &getAnalysis<MachineLoopInfo>(); |
368 | 552k | MDT = &getAnalysis<MachineDominatorTree>(); |
369 | 552k | PassConfig = &getAnalysis<TargetPassConfig>(); |
370 | 552k | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
371 | 552k | |
372 | 552k | LIS = &getAnalysis<LiveIntervals>(); |
373 | 552k | |
374 | 552k | if (VerifyScheduling552k ) { |
375 | 1 | DEBUG(LIS->dump()); |
376 | 1 | MF->verify(this, "Before machine scheduling."); |
377 | 1 | } |
378 | 552k | RegClassInfo->runOnMachineFunction(*MF); |
379 | 552k | |
380 | 552k | // Instantiate the selected scheduler for this target, function, and |
381 | 552k | // optimization level. |
382 | 552k | std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); |
383 | 552k | scheduleRegions(*Scheduler, false); |
384 | 552k | |
385 | 552k | DEBUG(LIS->dump()); |
386 | 552k | if (VerifyScheduling) |
387 | 1 | MF->verify(this, "After machine scheduling."); |
388 | 588k | return true; |
389 | 588k | } |
390 | | |
391 | 460k | bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { |
392 | 460k | if (skipFunction(*mf.getFunction())) |
393 | 1 | return false; |
394 | 460k | |
395 | 460k | if (460k EnablePostRAMachineSched.getNumOccurrences()460k ) { |
396 | 45 | if (!EnablePostRAMachineSched) |
397 | 45 | return false; |
398 | 460k | } else if (460k !mf.getSubtarget().enablePostRAScheduler()460k ) { |
399 | 448k | DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); |
400 | 460k | return false; |
401 | 460k | } |
402 | 11.4k | DEBUG11.4k (dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); |
403 | 11.4k | |
404 | 11.4k | // Initialize the context of the pass. |
405 | 11.4k | MF = &mf; |
406 | 11.4k | MLI = &getAnalysis<MachineLoopInfo>(); |
407 | 11.4k | PassConfig = &getAnalysis<TargetPassConfig>(); |
408 | 11.4k | |
409 | 11.4k | if (VerifyScheduling) |
410 | 0 | MF->verify(this, "Before post machine scheduling."); |
411 | 11.4k | |
412 | 11.4k | // Instantiate the selected scheduler for this target, function, and |
413 | 11.4k | // optimization level. |
414 | 11.4k | std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); |
415 | 11.4k | scheduleRegions(*Scheduler, true); |
416 | 11.4k | |
417 | 11.4k | if (VerifyScheduling) |
418 | 0 | MF->verify(this, "After post machine scheduling."); |
419 | 460k | return true; |
420 | 460k | } |
421 | | |
422 | | /// Return true of the given instruction should not be included in a scheduling |
423 | | /// region. |
424 | | /// |
425 | | /// MachineScheduler does not currently support scheduling across calls. To |
426 | | /// handle calls, the DAG builder needs to be modified to create register |
427 | | /// anti/output dependencies on the registers clobbered by the call's regmask |
428 | | /// operand. In PreRA scheduling, the stack pointer adjustment already prevents |
429 | | /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce |
430 | | /// the boundary, but there would be no benefit to postRA scheduling across |
431 | | /// calls this late anyway. |
432 | | static bool isSchedBoundary(MachineBasicBlock::iterator MI, |
433 | | MachineBasicBlock *MBB, |
434 | | MachineFunction *MF, |
435 | 31.1M | const TargetInstrInfo *TII) { |
436 | 28.7M | return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); |
437 | 31.1M | } |
438 | | |
439 | | /// A region of an MBB for scheduling. |
440 | | namespace { |
441 | | struct SchedRegion { |
442 | | /// RegionBegin is the first instruction in the scheduling region, and |
443 | | /// RegionEnd is either MBB->end() or the scheduling boundary after the |
444 | | /// last instruction in the scheduling region. These iterators cannot refer |
445 | | /// to instructions outside of the identified scheduling region because |
446 | | /// those may be reordered before scheduling this region. |
447 | | MachineBasicBlock::iterator RegionBegin; |
448 | | MachineBasicBlock::iterator RegionEnd; |
449 | | unsigned NumRegionInstrs; |
450 | | |
451 | | SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E, |
452 | | unsigned N) : |
453 | 11.7M | RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {} |
454 | | }; |
455 | | } // end anonymous namespace |
456 | | |
457 | | using MBBRegionsVector = SmallVector<SchedRegion, 16>; |
458 | | |
459 | | static void |
460 | | getSchedRegions(MachineBasicBlock *MBB, |
461 | | MBBRegionsVector &Regions, |
462 | 4.18M | bool RegionsTopDown) { |
463 | 4.18M | MachineFunction *MF = MBB->getParent(); |
464 | 4.18M | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); |
465 | 4.18M | |
466 | 4.18M | MachineBasicBlock::iterator I = nullptr; |
467 | 4.18M | for(MachineBasicBlock::iterator RegionEnd = MBB->end(); |
468 | 15.9M | RegionEnd != MBB->begin()15.9M ; RegionEnd = I11.7M ) { |
469 | 11.7M | |
470 | 11.7M | // Avoid decrementing RegionEnd for blocks with no terminator. |
471 | 11.7M | if (RegionEnd != MBB->end() || |
472 | 11.7M | isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)4.09M ) { |
473 | 11.2M | --RegionEnd; |
474 | 11.2M | } |
475 | 11.7M | |
476 | 11.7M | // The next region starts above the previous region. Look backward in the |
477 | 11.7M | // instruction stream until we find the nearest boundary. |
478 | 11.7M | unsigned NumRegionInstrs = 0; |
479 | 11.7M | I = RegionEnd; |
480 | 31.1M | for (;I != MBB->begin()31.1M ; --I19.3M ) { |
481 | 27.0M | MachineInstr &MI = *std::prev(I); |
482 | 27.0M | if (isSchedBoundary(&MI, &*MBB, MF, TII)) |
483 | 7.69M | break; |
484 | 19.3M | if (19.3M !MI.isDebugValue()19.3M ) |
485 | 19.3M | // MBB::size() uses instr_iterator to count. Here we need a bundle to |
486 | 19.3M | // count as a single instruction. |
487 | 19.3M | ++NumRegionInstrs; |
488 | 27.0M | } |
489 | 11.7M | |
490 | 11.7M | Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); |
491 | 11.7M | } |
492 | 4.18M | |
493 | 4.18M | if (RegionsTopDown) |
494 | 3.25k | std::reverse(Regions.begin(), Regions.end()); |
495 | 4.18M | } |
496 | | |
497 | | /// Main driver for both MachineScheduler and PostMachineScheduler. |
498 | | void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, |
499 | 563k | bool FixKillFlags) { |
500 | 563k | // Visit all machine basic blocks. |
501 | 563k | // |
502 | 563k | // TODO: Visit blocks in global postorder or postorder within the bottom-up |
503 | 563k | // loop tree. Then we can optionally compute global RegPressure. |
504 | 563k | for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); |
505 | 4.74M | MBB != MBBEnd4.74M ; ++MBB4.18M ) { |
506 | 4.18M | |
507 | 4.18M | Scheduler.startBlock(&*MBB); |
508 | 4.18M | |
509 | | #ifndef NDEBUG |
510 | | if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) |
511 | | continue; |
512 | | if (SchedOnlyBlock.getNumOccurrences() |
513 | | && (int)SchedOnlyBlock != MBB->getNumber()) |
514 | | continue; |
515 | | #endif |
516 | | |
517 | 4.18M | // Break the block into scheduling regions [I, RegionEnd). RegionEnd |
518 | 4.18M | // points to the scheduling boundary at the bottom of the region. The DAG |
519 | 4.18M | // does not include RegionEnd, but the region does (i.e. the next |
520 | 4.18M | // RegionEnd is above the previous RegionBegin). If the current block has |
521 | 4.18M | // no terminator then RegionEnd == MBB->end() for the bottom region. |
522 | 4.18M | // |
523 | 4.18M | // All the regions of MBB are first found and stored in MBBRegions, which |
524 | 4.18M | // will be processed (MBB) top-down if initialized with true. |
525 | 4.18M | // |
526 | 4.18M | // The Scheduler may insert instructions during either schedule() or |
527 | 4.18M | // exitRegion(), even for empty regions. So the local iterators 'I' and |
528 | 4.18M | // 'RegionEnd' are invalid across these calls. Instructions must not be |
529 | 4.18M | // added to other regions than the current one without updating MBBRegions. |
530 | 4.18M | |
531 | 4.18M | MBBRegionsVector MBBRegions; |
532 | 4.18M | getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown()); |
533 | 4.18M | for (MBBRegionsVector::iterator R = MBBRegions.begin(); |
534 | 15.9M | R != MBBRegions.end()15.9M ; ++R11.7M ) { |
535 | 11.7M | MachineBasicBlock::iterator I = R->RegionBegin; |
536 | 11.7M | MachineBasicBlock::iterator RegionEnd = R->RegionEnd; |
537 | 11.7M | unsigned NumRegionInstrs = R->NumRegionInstrs; |
538 | 11.7M | |
539 | 11.7M | // Notify the scheduler of the region, even if we may skip scheduling |
540 | 11.7M | // it. Perhaps it still needs to be bundled. |
541 | 11.7M | Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); |
542 | 11.7M | |
543 | 11.7M | // Skip empty scheduling regions (0 or 1 schedulable instructions). |
544 | 11.7M | if (I == RegionEnd || 11.7M I == std::prev(RegionEnd)6.79M ) { |
545 | 7.70M | // Close the current region. Bundle the terminator if needed. |
546 | 7.70M | // This invalidates 'RegionEnd' and 'I'. |
547 | 7.70M | Scheduler.exitRegion(); |
548 | 7.70M | continue; |
549 | 7.70M | } |
550 | 4.08M | DEBUG4.08M (dbgs() << "********** MI Scheduling **********\n"); |
551 | 4.08M | DEBUG(dbgs() << MF->getName() |
552 | 4.08M | << ":BB#" << MBB->getNumber() << " " << MBB->getName() |
553 | 4.08M | << "\n From: " << *I << " To: "; |
554 | 4.08M | if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; |
555 | 4.08M | else dbgs() << "End"; |
556 | 4.08M | dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); |
557 | 4.08M | if (DumpCriticalPathLength4.08M ) { |
558 | 0 | errs() << MF->getName(); |
559 | 0 | errs() << ":BB# " << MBB->getNumber(); |
560 | 0 | errs() << " " << MBB->getName() << " \n"; |
561 | 0 | } |
562 | 11.7M | |
563 | 11.7M | // Schedule a region: possibly reorder instructions. |
564 | 11.7M | // This invalidates the original region iterators. |
565 | 11.7M | Scheduler.schedule(); |
566 | 11.7M | |
567 | 11.7M | // Close the current region. |
568 | 11.7M | Scheduler.exitRegion(); |
569 | 11.7M | } |
570 | 4.18M | Scheduler.finishBlock(); |
571 | 4.18M | // FIXME: Ideally, no further passes should rely on kill flags. However, |
572 | 4.18M | // thumb2 size reduction is currently an exception, so the PostMIScheduler |
573 | 4.18M | // needs to do this. |
574 | 4.18M | if (FixKillFlags) |
575 | 13.2k | Scheduler.fixupKills(*MBB); |
576 | 4.18M | } |
577 | 563k | Scheduler.finalizeSchedule(); |
578 | 563k | } |
579 | | |
580 | 0 | void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { |
581 | 0 | // unimplemented |
582 | 0 | } |
583 | | |
584 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
585 | | LLVM_DUMP_METHOD void ReadyQueue::dump() const { |
586 | | dbgs() << "Queue " << Name << ": "; |
587 | | for (const SUnit *SU : Queue) |
588 | | dbgs() << SU->NodeNum << " "; |
589 | | dbgs() << "\n"; |
590 | | } |
591 | | #endif |
592 | | |
593 | | //===----------------------------------------------------------------------===// |
594 | | // ScheduleDAGMI - Basic machine instruction scheduling. This is |
595 | | // independent of PreRA/PostRA scheduling and involves no extra book-keeping for |
596 | | // virtual registers. |
597 | | // ===----------------------------------------------------------------------===/ |
598 | | |
599 | | // Provide a vtable anchor. |
600 | 563k | ScheduleDAGMI::~ScheduleDAGMI() = default; |
601 | | |
602 | 46.3k | bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { |
603 | 46.3k | return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); |
604 | 46.3k | } |
605 | | |
606 | 1.39M | bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { |
607 | 1.39M | if (SuccSU != &ExitSU1.39M ) { |
608 | 665k | // Do not use WillCreateCycle, it assumes SD scheduling. |
609 | 665k | // If Pred is reachable from Succ, then the edge creates a cycle. |
610 | 665k | if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) |
611 | 6.14k | return false; |
612 | 659k | Topo.AddPred(SuccSU, PredDep.getSUnit()); |
613 | 659k | } |
614 | 1.38M | SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); |
615 | 1.38M | // Return true regardless of whether a new edge needed to be inserted. |
616 | 1.38M | return true; |
617 | 1.39M | } |
618 | | |
619 | | /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When |
620 | | /// NumPredsLeft reaches zero, release the successor node. |
621 | | /// |
622 | | /// FIXME: Adjust SuccSU height based on MinLatency. |
623 | 1.47M | void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { |
624 | 1.47M | SUnit *SuccSU = SuccEdge->getSUnit(); |
625 | 1.47M | |
626 | 1.47M | if (SuccEdge->isWeak()1.47M ) { |
627 | 5.67k | --SuccSU->WeakPredsLeft; |
628 | 5.67k | if (SuccEdge->isCluster()) |
629 | 5.47k | NextClusterSucc = SuccSU; |
630 | 5.67k | return; |
631 | 5.67k | } |
632 | | #ifndef NDEBUG |
633 | | if (SuccSU->NumPredsLeft == 0) { |
634 | | dbgs() << "*** Scheduling failed! ***\n"; |
635 | | SuccSU->dump(this); |
636 | | dbgs() << " has been released too many times!\n"; |
637 | | llvm_unreachable(nullptr); |
638 | | } |
639 | | #endif |
640 | | // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, |
641 | 1.46M | // CurrCycle may have advanced since then. |
642 | 1.46M | if (1.46M SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()1.46M ) |
643 | 1.15M | SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); |
644 | 1.46M | |
645 | 1.46M | --SuccSU->NumPredsLeft; |
646 | 1.46M | if (SuccSU->NumPredsLeft == 0 && 1.46M SuccSU != &ExitSU880k ) |
647 | 842k | SchedImpl->releaseTopNode(SuccSU); |
648 | 1.47M | } |
649 | | |
650 | | /// releaseSuccessors - Call releaseSucc on each of SU's successors. |
651 | 5.47M | void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { |
652 | 5.47M | for (SDep &Succ : SU->Succs) |
653 | 1.47M | releaseSucc(SU, &Succ); |
654 | 5.47M | } |
655 | | |
656 | | /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When |
657 | | /// NumSuccsLeft reaches zero, release the predecessor node. |
658 | | /// |
659 | | /// FIXME: Adjust PredSU height based on MinLatency. |
660 | 24.2M | void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { |
661 | 24.2M | SUnit *PredSU = PredEdge->getSUnit(); |
662 | 24.2M | |
663 | 24.2M | if (PredEdge->isWeak()24.2M ) { |
664 | 984k | --PredSU->WeakSuccsLeft; |
665 | 984k | if (PredEdge->isCluster()) |
666 | 961k | NextClusterPred = PredSU; |
667 | 984k | return; |
668 | 984k | } |
669 | | #ifndef NDEBUG |
670 | | if (PredSU->NumSuccsLeft == 0) { |
671 | | dbgs() << "*** Scheduling failed! ***\n"; |
672 | | PredSU->dump(this); |
673 | | dbgs() << " has been released too many times!\n"; |
674 | | llvm_unreachable(nullptr); |
675 | | } |
676 | | #endif |
677 | | // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, |
678 | 23.2M | // CurrCycle may have advanced since then. |
679 | 23.2M | if (23.2M PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()23.2M ) |
680 | 12.8M | PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); |
681 | 23.2M | |
682 | 23.2M | --PredSU->NumSuccsLeft; |
683 | 23.2M | if (PredSU->NumSuccsLeft == 0 && 23.2M PredSU != &EntrySU13.6M ) |
684 | 13.6M | SchedImpl->releaseBottomNode(PredSU); |
685 | 24.2M | } |
686 | | |
687 | | /// releasePredecessors - Call releasePred on each of SU's predecessors. |
688 | 19.3M | void ScheduleDAGMI::releasePredecessors(SUnit *SU) { |
689 | 19.3M | for (SDep &Pred : SU->Preds) |
690 | 24.2M | releasePred(SU, &Pred); |
691 | 19.3M | } |
692 | | |
693 | 4.19M | void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) { |
694 | 4.19M | ScheduleDAGInstrs::startBlock(bb); |
695 | 4.19M | SchedImpl->enterMBB(bb); |
696 | 4.19M | } |
697 | | |
698 | 4.19M | void ScheduleDAGMI::finishBlock() { |
699 | 4.19M | SchedImpl->leaveMBB(); |
700 | 4.19M | ScheduleDAGInstrs::finishBlock(); |
701 | 4.19M | } |
702 | | |
703 | | /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after |
704 | | /// crossing a scheduling boundary. [begin, end) includes all instructions in |
705 | | /// the region, including the boundary itself and single-instruction regions |
706 | | /// that don't get scheduled. |
707 | | void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, |
708 | | MachineBasicBlock::iterator begin, |
709 | | MachineBasicBlock::iterator end, |
710 | | unsigned regioninstrs) |
711 | 11.8M | { |
712 | 11.8M | ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); |
713 | 11.8M | |
714 | 11.8M | SchedImpl->initPolicy(begin, end, regioninstrs); |
715 | 11.8M | } |
716 | | |
717 | | /// This is normally called from the main scheduler loop but may also be invoked |
718 | | /// by the scheduling strategy to perform additional code motion. |
719 | | void ScheduleDAGMI::moveInstruction( |
720 | 1.78M | MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { |
721 | 1.78M | // Advance RegionBegin if the first instruction moves down. |
722 | 1.78M | if (&*RegionBegin == MI) |
723 | 382k | ++RegionBegin; |
724 | 1.78M | |
725 | 1.78M | // Update the instruction stream. |
726 | 1.78M | BB->splice(InsertPos, BB, MI); |
727 | 1.78M | |
728 | 1.78M | // Update LiveIntervals |
729 | 1.78M | if (LIS) |
730 | 1.78M | LIS->handleMove(*MI, /*UpdateFlags=*/true); |
731 | 1.78M | |
732 | 1.78M | // Recede RegionBegin if an instruction moves above the first. |
733 | 1.78M | if (RegionBegin == InsertPos) |
734 | 91.5k | RegionBegin = MI; |
735 | 1.78M | } |
736 | | |
737 | 16.6M | bool ScheduleDAGMI::checkSchedLimit() { |
738 | | #ifndef NDEBUG |
739 | | if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { |
740 | | CurrentTop = CurrentBottom; |
741 | | return false; |
742 | | } |
743 | | ++NumInstrsScheduled; |
744 | | #endif |
745 | | return true; |
746 | 16.6M | } |
747 | | |
748 | | /// Per-region scheduling driver, called back from |
749 | | /// MachineScheduler::runOnMachineFunction. This is a simplified driver that |
750 | | /// does not consider liveness or register pressure. It is useful for PostRA |
751 | | /// scheduling and potentially other custom schedulers. |
752 | 7.71k | void ScheduleDAGMI::schedule() { |
753 | 7.71k | DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); |
754 | 7.71k | DEBUG(SchedImpl->dumpPolicy()); |
755 | 7.71k | |
756 | 7.71k | // Build the DAG. |
757 | 7.71k | buildSchedGraph(AA); |
758 | 7.71k | |
759 | 7.71k | Topo.InitDAGTopologicalSorting(); |
760 | 7.71k | |
761 | 7.71k | postprocessDAG(); |
762 | 7.71k | |
763 | 7.71k | SmallVector<SUnit*, 8> TopRoots, BotRoots; |
764 | 7.71k | findRootsAndBiasEdges(TopRoots, BotRoots); |
765 | 7.71k | |
766 | 7.71k | // Initialize the strategy before modifying the DAG. |
767 | 7.71k | // This may initialize a DFSResult to be used for queue priority. |
768 | 7.71k | SchedImpl->initialize(this); |
769 | 7.71k | |
770 | 7.71k | DEBUG( |
771 | 7.71k | if (EntrySU.getInstr() != nullptr) |
772 | 7.71k | EntrySU.dumpAll(this); |
773 | 7.71k | for (const SUnit &SU : SUnits) |
774 | 7.71k | SU.dumpAll(this); |
775 | 7.71k | if (ExitSU.getInstr() != nullptr) |
776 | 7.71k | ExitSU.dumpAll(this); |
777 | 7.71k | ); |
778 | 7.71k | if (ViewMISchedDAGs7.71k ) viewGraph()0 ; |
779 | 7.71k | |
780 | 7.71k | // Initialize ready queues now that the DAG and priority data are finalized. |
781 | 7.71k | initQueues(TopRoots, BotRoots); |
782 | 7.71k | |
783 | 7.71k | bool IsTopNode = false; |
784 | 46.5k | while (true46.5k ) { |
785 | 46.5k | DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); |
786 | 46.5k | SUnit *SU = SchedImpl->pickNode(IsTopNode); |
787 | 46.5k | if (!SU46.5k ) break7.71k ; |
788 | 38.8k | |
789 | 46.5k | assert(!SU->isScheduled && "Node already scheduled"); |
790 | 38.8k | if (!checkSchedLimit()) |
791 | 0 | break; |
792 | 38.8k | |
793 | 38.8k | MachineInstr *MI = SU->getInstr(); |
794 | 38.8k | if (IsTopNode38.8k ) { |
795 | 38.8k | assert(SU->isTopReady() && "node still has unscheduled dependencies"); |
796 | 38.8k | if (&*CurrentTop == MI) |
797 | 34.8k | CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); |
798 | 38.8k | else |
799 | 4.01k | moveInstruction(MI, CurrentTop); |
800 | 0 | } else { |
801 | 0 | assert(SU->isBottomReady() && "node still has unscheduled dependencies"); |
802 | 0 | MachineBasicBlock::iterator priorII = |
803 | 0 | priorNonDebug(CurrentBottom, CurrentTop); |
804 | 0 | if (&*priorII == MI) |
805 | 0 | CurrentBottom = priorII; |
806 | 0 | else { |
807 | 0 | if (&*CurrentTop == MI) |
808 | 0 | CurrentTop = nextIfDebug(++CurrentTop, priorII); |
809 | 0 | moveInstruction(MI, CurrentBottom); |
810 | 0 | CurrentBottom = MI; |
811 | 0 | } |
812 | 0 | } |
813 | 46.5k | // Notify the scheduling strategy before updating the DAG. |
814 | 46.5k | // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues |
815 | 46.5k | // runs, it can then use the accurate ReadyCycle time to determine whether |
816 | 46.5k | // newly released nodes can move to the readyQ. |
817 | 46.5k | SchedImpl->schedNode(SU, IsTopNode); |
818 | 46.5k | |
819 | 46.5k | updateQueues(SU, IsTopNode); |
820 | 46.5k | } |
821 | 7.71k | assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); |
822 | 7.71k | |
823 | 7.71k | placeDebugValues(); |
824 | 7.71k | |
825 | 7.71k | DEBUG({ |
826 | 7.71k | unsigned BBNum = begin()->getParent()->getNumber(); |
827 | 7.71k | dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; |
828 | 7.71k | dumpSchedule(); |
829 | 7.71k | dbgs() << '\n'; |
830 | 7.71k | }); |
831 | 7.71k | } |
832 | | |
833 | | /// Apply each ScheduleDAGMutation step in order. |
834 | 4.08M | void ScheduleDAGMI::postprocessDAG() { |
835 | 4.08M | for (auto &m : Mutations) |
836 | 15.9M | m->apply(this); |
837 | 4.08M | } |
838 | | |
839 | | void ScheduleDAGMI:: |
840 | | findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, |
841 | 4.08M | SmallVectorImpl<SUnit*> &BotRoots) { |
842 | 16.6M | for (SUnit &SU : SUnits) { |
843 | 16.6M | assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits"); |
844 | 16.6M | |
845 | 16.6M | // Order predecessors so DFSResult follows the critical path. |
846 | 16.6M | SU.biasCriticalPath(); |
847 | 16.6M | |
848 | 16.6M | // A SUnit is ready to top schedule if it has no predecessors. |
849 | 16.6M | if (!SU.NumPredsLeft) |
850 | 10.1M | TopRoots.push_back(&SU); |
851 | 16.6M | // A SUnit is ready to bottom schedule if it has no successors. |
852 | 16.6M | if (!SU.NumSuccsLeft) |
853 | 2.86M | BotRoots.push_back(&SU); |
854 | 16.6M | } |
855 | 4.08M | ExitSU.biasCriticalPath(); |
856 | 4.08M | } |
857 | | |
858 | | /// Identify DAG roots and setup scheduler queues. |
859 | | void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, |
860 | 4.08M | ArrayRef<SUnit*> BotRoots) { |
861 | 4.08M | NextClusterSucc = nullptr; |
862 | 4.08M | NextClusterPred = nullptr; |
863 | 4.08M | |
864 | 4.08M | // Release all DAG roots for scheduling, not including EntrySU/ExitSU. |
865 | 4.08M | // |
866 | 4.08M | // Nodes with unreleased weak edges can still be roots. |
867 | 4.08M | // Release top roots in forward order. |
868 | 4.08M | for (SUnit *SU : TopRoots) |
869 | 10.1M | SchedImpl->releaseTopNode(SU); |
870 | 4.08M | |
871 | 4.08M | // Release bottom roots in reverse order so the higher priority nodes appear |
872 | 4.08M | // first. This is more natural and slightly more efficient. |
873 | 4.08M | for (SmallVectorImpl<SUnit*>::const_reverse_iterator |
874 | 6.95M | I = BotRoots.rbegin(), E = BotRoots.rend(); I != E6.95M ; ++I2.86M ) { |
875 | 2.86M | SchedImpl->releaseBottomNode(*I); |
876 | 2.86M | } |
877 | 4.08M | |
878 | 4.08M | releaseSuccessors(&EntrySU); |
879 | 4.08M | releasePredecessors(&ExitSU); |
880 | 4.08M | |
881 | 4.08M | SchedImpl->registerRoots(); |
882 | 4.08M | |
883 | 4.08M | // Advance past initial DebugValues. |
884 | 4.08M | CurrentTop = nextIfDebug(RegionBegin, RegionEnd); |
885 | 4.08M | CurrentBottom = RegionEnd; |
886 | 4.08M | } |
887 | | |
888 | | /// Update scheduler queues after scheduling an instruction. |
889 | 16.6M | void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { |
890 | 16.6M | // Release dependent instructions for scheduling. |
891 | 16.6M | if (IsTopNode) |
892 | 1.38M | releaseSuccessors(SU); |
893 | 16.6M | else |
894 | 15.2M | releasePredecessors(SU); |
895 | 16.6M | |
896 | 16.6M | SU->isScheduled = true; |
897 | 16.6M | } |
898 | | |
899 | | /// Reinsert any remaining debug_values, just like the PostRA scheduler. |
900 | 4.08M | void ScheduleDAGMI::placeDebugValues() { |
901 | 4.08M | // If first instruction was a DBG_VALUE then put it back. |
902 | 4.08M | if (FirstDbgValue4.08M ) { |
903 | 123 | BB->splice(RegionBegin, BB, FirstDbgValue); |
904 | 123 | RegionBegin = FirstDbgValue; |
905 | 123 | } |
906 | 4.08M | |
907 | 4.08M | for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator |
908 | 4.08M | DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE4.08M ; --DI362 ) { |
909 | 362 | std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); |
910 | 362 | MachineInstr *DbgValue = P.first; |
911 | 362 | MachineBasicBlock::iterator OrigPrevMI = P.second; |
912 | 362 | if (&*RegionBegin == DbgValue) |
913 | 0 | ++RegionBegin; |
914 | 362 | BB->splice(++OrigPrevMI, BB, DbgValue); |
915 | 362 | if (OrigPrevMI == std::prev(RegionEnd)) |
916 | 55 | RegionEnd = DbgValue; |
917 | 362 | } |
918 | 4.08M | DbgValues.clear(); |
919 | 4.08M | FirstDbgValue = nullptr; |
920 | 4.08M | } |
921 | | |
922 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
923 | | LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { |
924 | | for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { |
925 | | if (SUnit *SU = getSUnit(&(*MI))) |
926 | | SU->dump(this); |
927 | | else |
928 | | dbgs() << "Missing SUnit\n"; |
929 | | } |
930 | | } |
931 | | #endif |
932 | | |
933 | | //===----------------------------------------------------------------------===// |
934 | | // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals |
935 | | // preservation. |
936 | | //===----------------------------------------------------------------------===// |
937 | | |
938 | 552k | ScheduleDAGMILive::~ScheduleDAGMILive() { |
939 | 552k | delete DFSResult; |
940 | 552k | } |
941 | | |
942 | 2.86M | void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { |
943 | 2.86M | const MachineInstr &MI = *SU.getInstr(); |
944 | 10.2M | for (const MachineOperand &MO : MI.operands()) { |
945 | 10.2M | if (!MO.isReg()) |
946 | 3.47M | continue; |
947 | 6.78M | if (6.78M !MO.readsReg()6.78M ) |
948 | 2.32M | continue; |
949 | 4.45M | if (4.45M TrackLaneMasks && 4.45M !MO.isUse()570k ) |
950 | 92.1k | continue; |
951 | 4.36M | |
952 | 4.36M | unsigned Reg = MO.getReg(); |
953 | 4.36M | if (!TargetRegisterInfo::isVirtualRegister(Reg)) |
954 | 860k | continue; |
955 | 3.50M | |
956 | 3.50M | // Ignore re-defs. |
957 | 3.50M | if (3.50M TrackLaneMasks3.50M ) { |
958 | 286k | bool FoundDef = false; |
959 | 1.46M | for (const MachineOperand &MO2 : MI.operands()) { |
960 | 1.46M | if (MO2.isReg() && 1.46M MO2.isDef()964k && MO2.getReg() == Reg268k && !MO2.isDead()6.73k ) { |
961 | 6.73k | FoundDef = true; |
962 | 6.73k | break; |
963 | 6.73k | } |
964 | 286k | } |
965 | 286k | if (FoundDef) |
966 | 6.73k | continue; |
967 | 3.49M | } |
968 | 3.49M | |
969 | 3.49M | // Record this local VReg use. |
970 | 3.49M | VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); |
971 | 39.9M | for (; UI != VRegUses.end()39.9M ; ++UI36.4M ) { |
972 | 36.4M | if (UI->SU == &SU) |
973 | 65.6k | break; |
974 | 36.4M | } |
975 | 3.49M | if (UI == VRegUses.end()) |
976 | 3.43M | VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU)); |
977 | 10.2M | } |
978 | 2.86M | } |
979 | | |
980 | | /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after |
981 | | /// crossing a scheduling boundary. [begin, end) includes all instructions in |
982 | | /// the region, including the boundary itself and single-instruction regions |
983 | | /// that don't get scheduled. |
984 | | void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, |
985 | | MachineBasicBlock::iterator begin, |
986 | | MachineBasicBlock::iterator end, |
987 | | unsigned regioninstrs) |
988 | 11.7M | { |
989 | 11.7M | // ScheduleDAGMI initializes SchedImpl's per-region policy. |
990 | 11.7M | ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); |
991 | 11.7M | |
992 | 11.7M | // For convenience remember the end of the liveness region. |
993 | 11.7M | LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd497k : std::next(RegionEnd)11.2M ; |
994 | 11.7M | |
995 | 11.7M | SUPressureDiffs.clear(); |
996 | 11.7M | |
997 | 11.7M | ShouldTrackPressure = SchedImpl->shouldTrackPressure(); |
998 | 11.7M | ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); |
999 | 11.7M | |
1000 | 11.7M | assert((!ShouldTrackLaneMasks || ShouldTrackPressure) && |
1001 | 11.7M | "ShouldTrackLaneMasks requires ShouldTrackPressure"); |
1002 | 11.7M | } |
1003 | | |
1004 | | // Setup the register pressure trackers for the top scheduled top and bottom |
1005 | | // scheduled regions. |
1006 | 112k | void ScheduleDAGMILive::initRegPressure() { |
1007 | 112k | VRegUses.clear(); |
1008 | 112k | VRegUses.setUniverse(MRI.getNumVirtRegs()); |
1009 | 112k | for (SUnit &SU : SUnits) |
1010 | 2.86M | collectVRegUses(SU); |
1011 | 112k | |
1012 | 112k | TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, |
1013 | 112k | ShouldTrackLaneMasks, false); |
1014 | 112k | BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, |
1015 | 112k | ShouldTrackLaneMasks, false); |
1016 | 112k | |
1017 | 112k | // Close the RPTracker to finalize live ins. |
1018 | 112k | RPTracker.closeRegion(); |
1019 | 112k | |
1020 | 112k | DEBUG(RPTracker.dump()); |
1021 | 112k | |
1022 | 112k | // Initialize the live ins and live outs. |
1023 | 112k | TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); |
1024 | 112k | BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); |
1025 | 112k | |
1026 | 112k | // Close one end of the tracker so we can call |
1027 | 112k | // getMaxUpward/DownwardPressureDelta before advancing across any |
1028 | 112k | // instructions. This converts currently live regs into live ins/outs. |
1029 | 112k | TopRPTracker.closeTop(); |
1030 | 112k | BotRPTracker.closeBottom(); |
1031 | 112k | |
1032 | 112k | BotRPTracker.initLiveThru(RPTracker); |
1033 | 112k | if (!BotRPTracker.getLiveThru().empty()112k ) { |
1034 | 112k | TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); |
1035 | 112k | DEBUG(dbgs() << "Live Thru: "; |
1036 | 112k | dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); |
1037 | 112k | }; |
1038 | 112k | |
1039 | 112k | // For each live out vreg reduce the pressure change associated with other |
1040 | 112k | // uses of the same vreg below the live-out reaching def. |
1041 | 112k | updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); |
1042 | 112k | |
1043 | 112k | // Account for liveness generated by the region boundary. |
1044 | 112k | if (LiveRegionEnd != RegionEnd112k ) { |
1045 | 100k | SmallVector<RegisterMaskPair, 8> LiveUses; |
1046 | 100k | BotRPTracker.recede(&LiveUses); |
1047 | 100k | updatePressureDiffs(LiveUses); |
1048 | 100k | } |
1049 | 112k | |
1050 | 112k | DEBUG( |
1051 | 112k | dbgs() << "Top Pressure:\n"; |
1052 | 112k | dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); |
1053 | 112k | dbgs() << "Bottom Pressure:\n"; |
1054 | 112k | dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); |
1055 | 112k | ); |
1056 | 112k | |
1057 | 112k | assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); |
1058 | 112k | |
1059 | 112k | // Cache the list of excess pressure sets in this region. This will also track |
1060 | 112k | // the max pressure in the scheduled code for these sets. |
1061 | 112k | RegionCriticalPSets.clear(); |
1062 | 112k | const std::vector<unsigned> &RegionPressure = |
1063 | 112k | RPTracker.getPressure().MaxSetPressure; |
1064 | 1.77M | for (unsigned i = 0, e = RegionPressure.size(); i < e1.77M ; ++i1.66M ) { |
1065 | 1.66M | unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); |
1066 | 1.66M | if (RegionPressure[i] > Limit1.66M ) { |
1067 | 3.83k | DEBUG(dbgs() << TRI->getRegPressureSetName(i) |
1068 | 3.83k | << " Limit " << Limit |
1069 | 3.83k | << " Actual " << RegionPressure[i] << "\n"); |
1070 | 3.83k | RegionCriticalPSets.push_back(PressureChange(i)); |
1071 | 3.83k | } |
1072 | 1.66M | } |
1073 | 112k | DEBUG(dbgs() << "Excess PSets: "; |
1074 | 112k | for (const PressureChange &RCPS : RegionCriticalPSets) |
1075 | 112k | dbgs() << TRI->getRegPressureSetName( |
1076 | 112k | RCPS.getPSet()) << " "; |
1077 | 112k | dbgs() << "\n"); |
1078 | 112k | } |
1079 | | |
1080 | | void ScheduleDAGMILive:: |
1081 | | updateScheduledPressure(const SUnit *SU, |
1082 | 2.86M | const std::vector<unsigned> &NewMaxPressure) { |
1083 | 2.86M | const PressureDiff &PDiff = getPressureDiff(SU); |
1084 | 2.86M | unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); |
1085 | 5.81M | for (const PressureChange &PC : PDiff) { |
1086 | 5.81M | if (!PC.isValid()) |
1087 | 2.84M | break; |
1088 | 2.96M | unsigned ID = PC.getPSet(); |
1089 | 3.04M | while (CritIdx != CritEnd && 3.04M RegionCriticalPSets[CritIdx].getPSet() < ID345k ) |
1090 | 75.4k | ++CritIdx; |
1091 | 2.96M | if (CritIdx != CritEnd && 2.96M RegionCriticalPSets[CritIdx].getPSet() == ID270k ) { |
1092 | 176k | if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() |
1093 | 24.2k | && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max()) |
1094 | 24.2k | RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); |
1095 | 176k | } |
1096 | 2.96M | unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); |
1097 | 2.96M | if (NewMaxPressure[ID] >= Limit - 22.96M ) { |
1098 | 220k | DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " |
1099 | 220k | << NewMaxPressure[ID] |
1100 | 220k | << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit |
1101 | 220k | << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); |
1102 | 220k | } |
1103 | 5.81M | } |
1104 | 2.86M | } |
1105 | | |
1106 | | /// Update the PressureDiff array for liveness after scheduling this |
1107 | | /// instruction. |
1108 | | void ScheduleDAGMILive::updatePressureDiffs( |
1109 | 2.95M | ArrayRef<RegisterMaskPair> LiveUses) { |
1110 | 2.55M | for (const RegisterMaskPair &P : LiveUses) { |
1111 | 2.55M | unsigned Reg = P.RegUnit; |
1112 | 2.55M | /// FIXME: Currently assuming single-use physregs. |
1113 | 2.55M | if (!TRI->isVirtualRegister(Reg)) |
1114 | 136k | continue; |
1115 | 2.41M | |
1116 | 2.41M | if (2.41M ShouldTrackLaneMasks2.41M ) { |
1117 | 299k | // If the register has just become live then other uses won't change |
1118 | 299k | // this fact anymore => decrement pressure. |
1119 | 299k | // If the register has just become dead then other uses make it come |
1120 | 299k | // back to life => increment pressure. |
1121 | 299k | bool Decrement = P.LaneMask.any(); |
1122 | 299k | |
1123 | 299k | for (const VReg2SUnit &V2SU |
1124 | 447k | : make_range(VRegUses.find(Reg), VRegUses.end())) { |
1125 | 447k | SUnit &SU = *V2SU.SU; |
1126 | 447k | if (SU.isScheduled || 447k &SU == &ExitSU261k ) |
1127 | 186k | continue; |
1128 | 261k | |
1129 | 261k | PressureDiff &PDiff = getPressureDiff(&SU); |
1130 | 261k | PDiff.addPressureChange(Reg, Decrement, &MRI); |
1131 | 261k | DEBUG( |
1132 | 447k | dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " |
1133 | 447k | << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) |
1134 | 447k | << ' ' << *SU.getInstr(); |
1135 | 447k | dbgs() << " to "; |
1136 | 447k | PDiff.dump(*TRI); |
1137 | 447k | ); |
1138 | 447k | } |
1139 | 2.41M | } else { |
1140 | 2.11M | assert(P.LaneMask.any()); |
1141 | 2.11M | DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); |
1142 | 2.11M | // This may be called before CurrentBottom has been initialized. However, |
1143 | 2.11M | // BotRPTracker must have a valid position. We want the value live into the |
1144 | 2.11M | // instruction or live out of the block, so ask for the previous |
1145 | 2.11M | // instruction's live-out. |
1146 | 2.11M | const LiveInterval &LI = LIS->getInterval(Reg); |
1147 | 2.11M | VNInfo *VNI; |
1148 | 2.11M | MachineBasicBlock::const_iterator I = |
1149 | 2.11M | nextIfDebug(BotRPTracker.getPos(), BB->end()); |
1150 | 2.11M | if (I == BB->end()) |
1151 | 244k | VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); |
1152 | 1.87M | else { |
1153 | 1.87M | LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I)); |
1154 | 1.87M | VNI = LRQ.valueIn(); |
1155 | 1.87M | } |
1156 | 2.11M | // RegisterPressureTracker guarantees that readsReg is true for LiveUses. |
1157 | 2.11M | assert(VNI && "No live value at use."); |
1158 | 2.11M | for (const VReg2SUnit &V2SU |
1159 | 3.89M | : make_range(VRegUses.find(Reg), VRegUses.end())) { |
1160 | 3.89M | SUnit *SU = V2SU.SU; |
1161 | 3.89M | // If this use comes before the reaching def, it cannot be a last use, |
1162 | 3.89M | // so decrease its pressure change. |
1163 | 3.89M | if (!SU->isScheduled && 3.89M SU != &ExitSU3.64M ) { |
1164 | 3.64M | LiveQueryResult LRQ = |
1165 | 3.64M | LI.Query(LIS->getInstructionIndex(*SU->getInstr())); |
1166 | 3.64M | if (LRQ.valueIn() == VNI3.64M ) { |
1167 | 3.13M | PressureDiff &PDiff = getPressureDiff(SU); |
1168 | 3.13M | PDiff.addPressureChange(Reg, true, &MRI); |
1169 | 3.13M | DEBUG( |
1170 | 3.13M | dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " |
1171 | 3.13M | << *SU->getInstr(); |
1172 | 3.13M | dbgs() << " to "; |
1173 | 3.13M | PDiff.dump(*TRI); |
1174 | 3.13M | ); |
1175 | 3.13M | } |
1176 | 3.64M | } |
1177 | 3.89M | } |
1178 | 2.11M | } |
1179 | 2.55M | } |
1180 | 2.95M | } |
1181 | | |
1182 | | /// schedule - Called back from MachineScheduler::runOnMachineFunction |
1183 | | /// after setting up the current scheduling region. [RegionBegin, RegionEnd) |
1184 | | /// only includes instructions that have DAG nodes, not scheduling boundaries. |
1185 | | /// |
1186 | | /// This is a skeletal driver, with all the functionality pushed into helpers, |
1187 | | /// so that it can be easily extended by experimental schedulers. Generally, |
1188 | | /// implementing MachineSchedStrategy should be sufficient to implement a new |
1189 | | /// scheduling algorithm. However, if a scheduler further subclasses |
1190 | | /// ScheduleDAGMILive then it will want to override this virtual method in order |
1191 | | /// to update any specialized state. |
1192 | 4.07M | void ScheduleDAGMILive::schedule() { |
1193 | 4.07M | DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); |
1194 | 4.07M | DEBUG(SchedImpl->dumpPolicy()); |
1195 | 4.07M | buildDAGWithRegPressure(); |
1196 | 4.07M | |
1197 | 4.07M | Topo.InitDAGTopologicalSorting(); |
1198 | 4.07M | |
1199 | 4.07M | postprocessDAG(); |
1200 | 4.07M | |
1201 | 4.07M | SmallVector<SUnit*, 8> TopRoots, BotRoots; |
1202 | 4.07M | findRootsAndBiasEdges(TopRoots, BotRoots); |
1203 | 4.07M | |
1204 | 4.07M | // Initialize the strategy before modifying the DAG. |
1205 | 4.07M | // This may initialize a DFSResult to be used for queue priority. |
1206 | 4.07M | SchedImpl->initialize(this); |
1207 | 4.07M | |
1208 | 4.07M | DEBUG( |
1209 | 4.07M | if (EntrySU.getInstr() != nullptr) |
1210 | 4.07M | EntrySU.dumpAll(this); |
1211 | 4.07M | for (const SUnit &SU : SUnits) { |
1212 | 4.07M | SU.dumpAll(this); |
1213 | 4.07M | if (ShouldTrackPressure) { |
1214 | 4.07M | dbgs() << " Pressure Diff : "; |
1215 | 4.07M | getPressureDiff(&SU).dump(*TRI); |
1216 | 4.07M | } |
1217 | 4.07M | dbgs() << " Single Issue : "; |
1218 | 4.07M | if (SchedModel.mustBeginGroup(SU.getInstr()) && |
1219 | 4.07M | SchedModel.mustEndGroup(SU.getInstr())) |
1220 | 4.07M | dbgs() << "true;"; |
1221 | 4.07M | else |
1222 | 4.07M | dbgs() << "false;"; |
1223 | 4.07M | dbgs() << '\n'; |
1224 | 4.07M | } |
1225 | 4.07M | if (ExitSU.getInstr() != nullptr) |
1226 | 4.07M | ExitSU.dumpAll(this); |
1227 | 4.07M | ); |
1228 | 4.07M | if (ViewMISchedDAGs4.07M ) viewGraph()0 ; |
1229 | 4.07M | |
1230 | 4.07M | // Initialize ready queues now that the DAG and priority data are finalized. |
1231 | 4.07M | initQueues(TopRoots, BotRoots); |
1232 | 4.07M | |
1233 | 4.07M | bool IsTopNode = false; |
1234 | 20.6M | while (true20.6M ) { |
1235 | 20.6M | DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); |
1236 | 20.6M | SUnit *SU = SchedImpl->pickNode(IsTopNode); |
1237 | 20.6M | if (!SU20.6M ) break4.07M ; |
1238 | 16.5M | |
1239 | 20.6M | assert(!SU->isScheduled && "Node already scheduled"); |
1240 | 16.5M | if (!checkSchedLimit()) |
1241 | 0 | break; |
1242 | 16.5M | |
1243 | 16.5M | scheduleMI(SU, IsTopNode); |
1244 | 16.5M | |
1245 | 16.5M | if (DFSResult16.5M ) { |
1246 | 176 | unsigned SubtreeID = DFSResult->getSubtreeID(SU); |
1247 | 176 | if (!ScheduledTrees.test(SubtreeID)176 ) { |
1248 | 38 | ScheduledTrees.set(SubtreeID); |
1249 | 38 | DFSResult->scheduleTree(SubtreeID); |
1250 | 38 | SchedImpl->scheduleTree(SubtreeID); |
1251 | 38 | } |
1252 | 176 | } |
1253 | 20.6M | |
1254 | 20.6M | // Notify the scheduling strategy after updating the DAG. |
1255 | 20.6M | SchedImpl->schedNode(SU, IsTopNode); |
1256 | 20.6M | |
1257 | 20.6M | updateQueues(SU, IsTopNode); |
1258 | 20.6M | } |
1259 | 4.07M | assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); |
1260 | 4.07M | |
1261 | 4.07M | placeDebugValues(); |
1262 | 4.07M | |
1263 | 4.07M | DEBUG({ |
1264 | 4.07M | unsigned BBNum = begin()->getParent()->getNumber(); |
1265 | 4.07M | dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; |
1266 | 4.07M | dumpSchedule(); |
1267 | 4.07M | dbgs() << '\n'; |
1268 | 4.07M | }); |
1269 | 4.07M | } |
1270 | | |
1271 | | /// Build the DAG and setup three register pressure trackers. |
1272 | 4.07M | void ScheduleDAGMILive::buildDAGWithRegPressure() { |
1273 | 4.07M | if (!ShouldTrackPressure4.07M ) { |
1274 | 3.96M | RPTracker.reset(); |
1275 | 3.96M | RegionCriticalPSets.clear(); |
1276 | 3.96M | buildSchedGraph(AA); |
1277 | 3.96M | return; |
1278 | 3.96M | } |
1279 | 112k | |
1280 | 112k | // Initialize the register pressure tracker used by buildSchedGraph. |
1281 | 112k | RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, |
1282 | 112k | ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); |
1283 | 112k | |
1284 | 112k | // Account for liveness generate by the region boundary. |
1285 | 112k | if (LiveRegionEnd != RegionEnd) |
1286 | 100k | RPTracker.recede(); |
1287 | 4.07M | |
1288 | 4.07M | // Build the DAG, and compute current register pressure. |
1289 | 4.07M | buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks); |
1290 | 4.07M | |
1291 | 4.07M | // Initialize top/bottom trackers after computing region pressure. |
1292 | 4.07M | initRegPressure(); |
1293 | 4.07M | } |
1294 | | |
1295 | 10 | void ScheduleDAGMILive::computeDFSResult() { |
1296 | 10 | if (!DFSResult) |
1297 | 8 | DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); |
1298 | 10 | DFSResult->clear(); |
1299 | 10 | ScheduledTrees.clear(); |
1300 | 10 | DFSResult->resize(SUnits.size()); |
1301 | 10 | DFSResult->compute(SUnits); |
1302 | 10 | ScheduledTrees.resize(DFSResult->getNumSubtrees()); |
1303 | 10 | } |
1304 | | |
1305 | | /// Compute the max cyclic critical path through the DAG. The scheduling DAG |
1306 | | /// only provides the critical path for single block loops. To handle loops that |
1307 | | /// span blocks, we could use the vreg path latencies provided by |
1308 | | /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently |
1309 | | /// available for use in the scheduler. |
1310 | | /// |
1311 | | /// The cyclic path estimation identifies a def-use pair that crosses the back |
1312 | | /// edge and considers the depth and height of the nodes. For example, consider |
1313 | | /// the following instruction sequence where each instruction has unit latency |
1314 | | /// and defines an epomymous virtual register: |
1315 | | /// |
1316 | | /// a->b(a,c)->c(b)->d(c)->exit |
1317 | | /// |
1318 | | /// The cyclic critical path is a two cycles: b->c->b |
1319 | | /// The acyclic critical path is four cycles: a->b->c->d->exit |
1320 | | /// LiveOutHeight = height(c) = len(c->d->exit) = 2 |
1321 | | /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 |
1322 | | /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 |
1323 | | /// LiveInDepth = depth(b) = len(a->b) = 1 |
1324 | | /// |
1325 | | /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 |
1326 | | /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 |
1327 | | /// CyclicCriticalPath = min(2, 2) = 2 |
1328 | | /// |
1329 | | /// This could be relevant to PostRA scheduling, but is currently implemented |
1330 | | /// assuming LiveIntervals. |
1331 | 4.05M | unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { |
1332 | 4.05M | // This only applies to single block loop. |
1333 | 4.05M | if (!BB->isSuccessor(BB)) |
1334 | 3.78M | return 0; |
1335 | 272k | |
1336 | 272k | unsigned MaxCyclicLatency = 0; |
1337 | 272k | // Visit each live out vreg def to find def/use pairs that cross iterations. |
1338 | 71.1k | for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { |
1339 | 71.1k | unsigned Reg = P.RegUnit; |
1340 | 71.1k | if (!TRI->isVirtualRegister(Reg)) |
1341 | 0 | continue; |
1342 | 71.1k | const LiveInterval &LI = LIS->getInterval(Reg); |
1343 | 71.1k | const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); |
1344 | 71.1k | if (!DefVNI) |
1345 | 201 | continue; |
1346 | 70.9k | |
1347 | 70.9k | MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); |
1348 | 70.9k | const SUnit *DefSU = getSUnit(DefMI); |
1349 | 70.9k | if (!DefSU) |
1350 | 43.4k | continue; |
1351 | 27.4k | |
1352 | 27.4k | unsigned LiveOutHeight = DefSU->getHeight(); |
1353 | 27.4k | unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; |
1354 | 27.4k | // Visit all local users of the vreg def. |
1355 | 27.4k | for (const VReg2SUnit &V2SU |
1356 | 274k | : make_range(VRegUses.find(Reg), VRegUses.end())) { |
1357 | 274k | SUnit *SU = V2SU.SU; |
1358 | 274k | if (SU == &ExitSU) |
1359 | 0 | continue; |
1360 | 274k | |
1361 | 274k | // Only consider uses of the phi. |
1362 | 274k | LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr())); |
1363 | 274k | if (!LRQ.valueIn()->isPHIDef()) |
1364 | 9.02k | continue; |
1365 | 265k | |
1366 | 265k | // Assume that a path spanning two iterations is a cycle, which could |
1367 | 265k | // overestimate in strange cases. This allows cyclic latency to be |
1368 | 265k | // estimated as the minimum slack of the vreg's depth or height. |
1369 | 265k | unsigned CyclicLatency = 0; |
1370 | 265k | if (LiveOutDepth > SU->getDepth()) |
1371 | 265k | CyclicLatency = LiveOutDepth - SU->getDepth(); |
1372 | 265k | |
1373 | 265k | unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; |
1374 | 265k | if (LiveInHeight > LiveOutHeight265k ) { |
1375 | 265k | if (LiveInHeight - LiveOutHeight < CyclicLatency) |
1376 | 19.6k | CyclicLatency = LiveInHeight - LiveOutHeight; |
1377 | 265k | } else |
1378 | 1 | CyclicLatency = 0; |
1379 | 265k | |
1380 | 265k | DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" |
1381 | 265k | << SU->NodeNum << ") = " << CyclicLatency << "c\n"); |
1382 | 265k | if (CyclicLatency > MaxCyclicLatency) |
1383 | 15.5k | MaxCyclicLatency = CyclicLatency; |
1384 | 274k | } |
1385 | 71.1k | } |
1386 | 272k | DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); |
1387 | 4.05M | return MaxCyclicLatency; |
1388 | 4.05M | } |
1389 | | |
1390 | | /// Release ExitSU predecessors and setup scheduler queues. Re-position |
1391 | | /// the Top RP tracker in case the region beginning has changed. |
1392 | | void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots, |
1393 | 4.07M | ArrayRef<SUnit*> BotRoots) { |
1394 | 4.07M | ScheduleDAGMI::initQueues(TopRoots, BotRoots); |
1395 | 4.07M | if (ShouldTrackPressure4.07M ) { |
1396 | 112k | assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); |
1397 | 112k | TopRPTracker.setPos(CurrentTop); |
1398 | 112k | } |
1399 | 4.07M | } |
1400 | | |
1401 | | /// Move an instruction and update register pressure. |
1402 | 16.6M | void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { |
1403 | 16.6M | // Move the instruction to its new location in the instruction stream. |
1404 | 16.6M | MachineInstr *MI = SU->getInstr(); |
1405 | 16.6M | |
1406 | 16.6M | if (IsTopNode16.6M ) { |
1407 | 1.34M | assert(SU->isTopReady() && "node still has unscheduled dependencies"); |
1408 | 1.34M | if (&*CurrentTop == MI) |
1409 | 1.22M | CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); |
1410 | 119k | else { |
1411 | 119k | moveInstruction(MI, CurrentTop); |
1412 | 119k | TopRPTracker.setPos(MI); |
1413 | 119k | } |
1414 | 1.34M | |
1415 | 1.34M | if (ShouldTrackPressure1.34M ) { |
1416 | 125k | // Update top scheduled pressure. |
1417 | 125k | RegisterOperands RegOpers; |
1418 | 125k | RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); |
1419 | 125k | if (ShouldTrackLaneMasks125k ) { |
1420 | 48.1k | // Adjust liveness and add missing dead+read-undef flags. |
1421 | 48.1k | SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); |
1422 | 48.1k | RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); |
1423 | 125k | } else { |
1424 | 77.6k | // Adjust for missing dead-def flags. |
1425 | 77.6k | RegOpers.detectDeadDefs(*MI, *LIS); |
1426 | 77.6k | } |
1427 | 125k | |
1428 | 125k | TopRPTracker.advance(RegOpers); |
1429 | 125k | assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); |
1430 | 125k | DEBUG( |
1431 | 125k | dbgs() << "Top Pressure:\n"; |
1432 | 125k | dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); |
1433 | 125k | ); |
1434 | 125k | |
1435 | 125k | updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); |
1436 | 125k | } |
1437 | 16.6M | } else { |
1438 | 15.2M | assert(SU->isBottomReady() && "node still has unscheduled dependencies"); |
1439 | 15.2M | MachineBasicBlock::iterator priorII = |
1440 | 15.2M | priorNonDebug(CurrentBottom, CurrentTop); |
1441 | 15.2M | if (&*priorII == MI) |
1442 | 13.5M | CurrentBottom = priorII; |
1443 | 1.66M | else { |
1444 | 1.66M | if (&*CurrentTop == MI1.66M ) { |
1445 | 395k | CurrentTop = nextIfDebug(++CurrentTop, priorII); |
1446 | 395k | TopRPTracker.setPos(CurrentTop); |
1447 | 395k | } |
1448 | 1.66M | moveInstruction(MI, CurrentBottom); |
1449 | 1.66M | CurrentBottom = MI; |
1450 | 1.66M | } |
1451 | 15.2M | if (ShouldTrackPressure15.2M ) { |
1452 | 2.73M | RegisterOperands RegOpers; |
1453 | 2.73M | RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); |
1454 | 2.73M | if (ShouldTrackLaneMasks2.73M ) { |
1455 | 221k | // Adjust liveness and add missing dead+read-undef flags. |
1456 | 221k | SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); |
1457 | 221k | RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); |
1458 | 2.73M | } else { |
1459 | 2.51M | // Adjust for missing dead-def flags. |
1460 | 2.51M | RegOpers.detectDeadDefs(*MI, *LIS); |
1461 | 2.51M | } |
1462 | 2.73M | |
1463 | 2.73M | BotRPTracker.recedeSkipDebugValues(); |
1464 | 2.73M | SmallVector<RegisterMaskPair, 8> LiveUses; |
1465 | 2.73M | BotRPTracker.recede(RegOpers, &LiveUses); |
1466 | 2.73M | assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); |
1467 | 2.73M | DEBUG( |
1468 | 2.73M | dbgs() << "Bottom Pressure:\n"; |
1469 | 2.73M | dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); |
1470 | 2.73M | ); |
1471 | 2.73M | |
1472 | 2.73M | updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); |
1473 | 2.73M | updatePressureDiffs(LiveUses); |
1474 | 2.73M | } |
1475 | 15.2M | } |
1476 | 16.6M | } |
1477 | | |
1478 | | //===----------------------------------------------------------------------===// |
1479 | | // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores. |
1480 | | //===----------------------------------------------------------------------===// |
1481 | | |
1482 | | namespace { |
1483 | | |
1484 | | /// \brief Post-process the DAG to create cluster edges between neighboring |
1485 | | /// loads or between neighboring stores. |
1486 | | class BaseMemOpClusterMutation : public ScheduleDAGMutation { |
1487 | | struct MemOpInfo { |
1488 | | SUnit *SU; |
1489 | | unsigned BaseReg; |
1490 | | int64_t Offset; |
1491 | | |
1492 | | MemOpInfo(SUnit *su, unsigned reg, int64_t ofs) |
1493 | 2.77M | : SU(su), BaseReg(reg), Offset(ofs) {} |
1494 | | |
1495 | 1.99M | bool operator<(const MemOpInfo&RHS) const { |
1496 | 1.99M | return std::tie(BaseReg, Offset, SU->NodeNum) < |
1497 | 1.99M | std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum); |
1498 | 1.99M | } |
1499 | | }; |
1500 | | |
1501 | | const TargetInstrInfo *TII; |
1502 | | const TargetRegisterInfo *TRI; |
1503 | | bool IsLoad; |
1504 | | |
1505 | | public: |
1506 | | BaseMemOpClusterMutation(const TargetInstrInfo *tii, |
1507 | | const TargetRegisterInfo *tri, bool IsLoad) |
1508 | 941k | : TII(tii), TRI(tri), IsLoad(IsLoad) {} |
1509 | | |
1510 | | void apply(ScheduleDAGInstrs *DAGInstrs) override; |
1511 | | |
1512 | | protected: |
1513 | | void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG); |
1514 | | }; |
1515 | | |
1516 | | class StoreClusterMutation : public BaseMemOpClusterMutation { |
1517 | | public: |
1518 | | StoreClusterMutation(const TargetInstrInfo *tii, |
1519 | | const TargetRegisterInfo *tri) |
1520 | 470k | : BaseMemOpClusterMutation(tii, tri, false) {} |
1521 | | }; |
1522 | | |
1523 | | class LoadClusterMutation : public BaseMemOpClusterMutation { |
1524 | | public: |
1525 | | LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) |
1526 | 470k | : BaseMemOpClusterMutation(tii, tri, true) {} |
1527 | | }; |
1528 | | |
1529 | | } // end anonymous namespace |
1530 | | |
1531 | | namespace llvm { |
1532 | | |
1533 | | std::unique_ptr<ScheduleDAGMutation> |
1534 | | createLoadClusterDAGMutation(const TargetInstrInfo *TII, |
1535 | 470k | const TargetRegisterInfo *TRI) { |
1536 | 470k | return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI) |
1537 | 0 | : nullptr; |
1538 | 470k | } |
1539 | | |
1540 | | std::unique_ptr<ScheduleDAGMutation> |
1541 | | createStoreClusterDAGMutation(const TargetInstrInfo *TII, |
1542 | 470k | const TargetRegisterInfo *TRI) { |
1543 | 470k | return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI) |
1544 | 0 | : nullptr; |
1545 | 470k | } |
1546 | | |
1547 | | } // end namespace llvm |
1548 | | |
1549 | | void BaseMemOpClusterMutation::clusterNeighboringMemOps( |
1550 | 2.51M | ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { |
1551 | 2.51M | SmallVector<MemOpInfo, 32> MemOpRecords; |
1552 | 3.77M | for (SUnit *SU : MemOps) { |
1553 | 3.77M | unsigned BaseReg; |
1554 | 3.77M | int64_t Offset; |
1555 | 3.77M | if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) |
1556 | 2.77M | MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset)); |
1557 | 3.77M | } |
1558 | 2.51M | if (MemOpRecords.size() < 2) |
1559 | 2.11M | return; |
1560 | 395k | |
1561 | 395k | std::sort(MemOpRecords.begin(), MemOpRecords.end()); |
1562 | 395k | unsigned ClusterLength = 1; |
1563 | 1.46M | for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1)1.46M ; ++Idx1.07M ) { |
1564 | 1.07M | SUnit *SUa = MemOpRecords[Idx].SU; |
1565 | 1.07M | SUnit *SUb = MemOpRecords[Idx+1].SU; |
1566 | 1.07M | if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg, |
1567 | 1.07M | *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg, |
1568 | 1.07M | ClusterLength) && |
1569 | 1.07M | DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))294k ) { |
1570 | 294k | DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" |
1571 | 294k | << SUb->NodeNum << ")\n"); |
1572 | 294k | // Copy successor edges from SUa to SUb. Interleaving computation |
1573 | 294k | // dependent on SUa can prevent load combining due to register reuse. |
1574 | 294k | // Predecessor edges do not need to be copied from SUb to SUa since nearby |
1575 | 294k | // loads should have effectively the same inputs. |
1576 | 688k | for (const SDep &Succ : SUa->Succs) { |
1577 | 688k | if (Succ.getSUnit() == SUb) |
1578 | 294k | continue; |
1579 | 393k | DEBUG393k (dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); |
1580 | 393k | DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); |
1581 | 393k | } |
1582 | 294k | ++ClusterLength; |
1583 | 294k | } else |
1584 | 776k | ClusterLength = 1; |
1585 | 1.07M | } |
1586 | 2.51M | } |
1587 | | |
1588 | | /// \brief Callback from DAG postProcessing to create cluster edges for loads. |
1589 | 7.87M | void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { |
1590 | 7.87M | ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); |
1591 | 7.87M | |
1592 | 7.87M | // Map DAG NodeNum to store chain ID. |
1593 | 7.87M | DenseMap<unsigned, unsigned> StoreChainIDs; |
1594 | 7.87M | // Map each store chain to a set of dependent MemOps. |
1595 | 7.87M | SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; |
1596 | 31.4M | for (SUnit &SU : DAG->SUnits) { |
1597 | 31.4M | if ((IsLoad && 31.4M !SU.getInstr()->mayLoad()15.7M ) || |
1598 | 17.7M | (!IsLoad && 17.7M !SU.getInstr()->mayStore()15.7M )) |
1599 | 27.7M | continue; |
1600 | 3.77M | |
1601 | 3.77M | unsigned ChainPredID = DAG->SUnits.size(); |
1602 | 3.19M | for (const SDep &Pred : SU.Preds) { |
1603 | 3.19M | if (Pred.isCtrl()3.19M ) { |
1604 | 1.10M | ChainPredID = Pred.getSUnit()->NodeNum; |
1605 | 1.10M | break; |
1606 | 1.10M | } |
1607 | 3.77M | } |
1608 | 3.77M | // Check if this chain-like pred has been seen |
1609 | 3.77M | // before. ChainPredID==MaxNodeID at the top of the schedule. |
1610 | 3.77M | unsigned NumChains = StoreChainDependents.size(); |
1611 | 3.77M | std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = |
1612 | 3.77M | StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); |
1613 | 3.77M | if (Result.second) |
1614 | 2.51M | StoreChainDependents.resize(NumChains + 1); |
1615 | 31.4M | StoreChainDependents[Result.first->second].push_back(&SU); |
1616 | 31.4M | } |
1617 | 7.87M | |
1618 | 7.87M | // Iterate over the store chains. |
1619 | 7.87M | for (auto &SCD : StoreChainDependents) |
1620 | 2.51M | clusterNeighboringMemOps(SCD, DAG); |
1621 | 7.87M | } |
1622 | | |
1623 | | //===----------------------------------------------------------------------===// |
1624 | | // CopyConstrain - DAG post-processing to encourage copy elimination. |
1625 | | //===----------------------------------------------------------------------===// |
1626 | | |
1627 | | namespace { |
1628 | | |
1629 | | /// \brief Post-process the DAG to create weak edges from all uses of a copy to |
1630 | | /// the one use that defines the copy's source vreg, most likely an induction |
1631 | | /// variable increment. |
1632 | | class CopyConstrain : public ScheduleDAGMutation { |
1633 | | // Transient state. |
1634 | | SlotIndex RegionBeginIdx; |
1635 | | |
1636 | | // RegionEndIdx is the slot index of the last non-debug instruction in the |
1637 | | // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. |
1638 | | SlotIndex RegionEndIdx; |
1639 | | |
1640 | | public: |
1641 | 535k | CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} |
1642 | | |
1643 | | void apply(ScheduleDAGInstrs *DAGInstrs) override; |
1644 | | |
1645 | | protected: |
1646 | | void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); |
1647 | | }; |
1648 | | |
1649 | | } // end anonymous namespace |
1650 | | |
1651 | | namespace llvm { |
1652 | | |
1653 | | std::unique_ptr<ScheduleDAGMutation> |
1654 | | createCopyConstrainDAGMutation(const TargetInstrInfo *TII, |
1655 | 535k | const TargetRegisterInfo *TRI) { |
1656 | 535k | return llvm::make_unique<CopyConstrain>(TII, TRI); |
1657 | 535k | } |
1658 | | |
1659 | | } // end namespace llvm |
1660 | | |
1661 | | /// constrainLocalCopy handles two possibilities: |
1662 | | /// 1) Local src: |
1663 | | /// I0: = dst |
1664 | | /// I1: src = ... |
1665 | | /// I2: = dst |
1666 | | /// I3: dst = src (copy) |
1667 | | /// (create pred->succ edges I0->I1, I2->I1) |
1668 | | /// |
1669 | | /// 2) Local copy: |
1670 | | /// I0: dst = src (copy) |
1671 | | /// I1: = dst |
1672 | | /// I2: src = ... |
1673 | | /// I3: = dst |
1674 | | /// (create pred->succ edges I1->I2, I3->I2) |
1675 | | /// |
1676 | | /// Although the MachineScheduler is currently constrained to single blocks, |
1677 | | /// this algorithm should handle extended blocks. An EBB is a set of |
1678 | | /// contiguously numbered blocks such that the previous block in the EBB is |
1679 | | /// always the single predecessor. |
1680 | 6.14M | void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { |
1681 | 6.14M | LiveIntervals *LIS = DAG->getLIS(); |
1682 | 6.14M | MachineInstr *Copy = CopySU->getInstr(); |
1683 | 6.14M | |
1684 | 6.14M | // Check for pure vreg copies. |
1685 | 6.14M | const MachineOperand &SrcOp = Copy->getOperand(1); |
1686 | 6.14M | unsigned SrcReg = SrcOp.getReg(); |
1687 | 6.14M | if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || 6.14M !SrcOp.readsReg()4.33M ) |
1688 | 1.80M | return; |
1689 | 4.33M | |
1690 | 4.33M | const MachineOperand &DstOp = Copy->getOperand(0); |
1691 | 4.33M | unsigned DstReg = DstOp.getReg(); |
1692 | 4.33M | if (!TargetRegisterInfo::isVirtualRegister(DstReg) || 4.33M DstOp.isDead()197k ) |
1693 | 4.13M | return; |
1694 | 197k | |
1695 | 197k | // Check if either the dest or source is local. If it's live across a back |
1696 | 197k | // edge, it's not local. Note that if both vregs are live across the back |
1697 | 197k | // edge, we cannot successfully contrain the copy without cyclic scheduling. |
1698 | 197k | // If both the copy's source and dest are local live intervals, then we |
1699 | 197k | // should treat the dest as the global for the purpose of adding |
1700 | 197k | // constraints. This adds edges from source's other uses to the copy. |
1701 | 197k | unsigned LocalReg = SrcReg; |
1702 | 197k | unsigned GlobalReg = DstReg; |
1703 | 197k | LiveInterval *LocalLI = &LIS->getInterval(LocalReg); |
1704 | 197k | if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)197k ) { |
1705 | 130k | LocalReg = DstReg; |
1706 | 130k | GlobalReg = SrcReg; |
1707 | 130k | LocalLI = &LIS->getInterval(LocalReg); |
1708 | 130k | if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) |
1709 | 121k | return; |
1710 | 75.4k | } |
1711 | 75.4k | LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); |
1712 | 75.4k | |
1713 | 75.4k | // Find the global segment after the start of the local LI. |
1714 | 75.4k | LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); |
1715 | 75.4k | // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a |
1716 | 75.4k | // local live range. We could create edges from other global uses to the local |
1717 | 75.4k | // start, but the coalescer should have already eliminated these cases, so |
1718 | 75.4k | // don't bother dealing with it. |
1719 | 75.4k | if (GlobalSegment == GlobalLI->end()) |
1720 | 277 | return; |
1721 | 75.1k | |
1722 | 75.1k | // If GlobalSegment is killed at the LocalLI->start, the call to find() |
1723 | 75.1k | // returned the next global segment. But if GlobalSegment overlaps with |
1724 | 75.1k | // LocalLI->start, then advance to the next segement. If a hole in GlobalLI |
1725 | 75.1k | // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. |
1726 | 75.1k | if (75.1k GlobalSegment->contains(LocalLI->beginIndex())75.1k ) |
1727 | 14.9k | ++GlobalSegment; |
1728 | 75.1k | |
1729 | 75.1k | if (GlobalSegment == GlobalLI->end()) |
1730 | 1.41k | return; |
1731 | 73.7k | |
1732 | 73.7k | // Check if GlobalLI contains a hole in the vicinity of LocalLI. |
1733 | 73.7k | if (73.7k GlobalSegment != GlobalLI->begin()73.7k ) { |
1734 | 18.2k | // Two address defs have no hole. |
1735 | 18.2k | if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, |
1736 | 18.2k | GlobalSegment->start)) { |
1737 | 1.59k | return; |
1738 | 1.59k | } |
1739 | 16.6k | // If the prior global segment may be defined by the same two-address |
1740 | 16.6k | // instruction that also defines LocalLI, then can't make a hole here. |
1741 | 16.6k | if (16.6k SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, |
1742 | 16.6k | LocalLI->beginIndex())) { |
1743 | 0 | return; |
1744 | 0 | } |
1745 | 16.6k | // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise |
1746 | 16.6k | // it would be a disconnected component in the live range. |
1747 | 16.6k | assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && |
1748 | 16.6k | "Disconnected LRG within the scheduling region."); |
1749 | 16.6k | } |
1750 | 72.1k | MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); |
1751 | 72.1k | if (!GlobalDef) |
1752 | 1.05k | return; |
1753 | 71.0k | |
1754 | 71.0k | SUnit *GlobalSU = DAG->getSUnit(GlobalDef); |
1755 | 71.0k | if (!GlobalSU) |
1756 | 120 | return; |
1757 | 70.9k | |
1758 | 70.9k | // GlobalDef is the bottom of the GlobalLI hole. Open the hole by |
1759 | 70.9k | // constraining the uses of the last local def to precede GlobalDef. |
1760 | 70.9k | SmallVector<SUnit*,8> LocalUses; |
1761 | 70.9k | const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); |
1762 | 70.9k | MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); |
1763 | 70.9k | SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); |
1764 | 120k | for (const SDep &Succ : LastLocalSU->Succs) { |
1765 | 120k | if (Succ.getKind() != SDep::Data || 120k Succ.getReg() != LocalReg90.6k ) |
1766 | 30.1k | continue; |
1767 | 90.4k | if (90.4k Succ.getSUnit() == GlobalSU90.4k ) |
1768 | 58.1k | continue; |
1769 | 32.3k | if (32.3k !DAG->canAddEdge(GlobalSU, Succ.getSUnit())32.3k ) |
1770 | 12.3k | return; |
1771 | 19.9k | LocalUses.push_back(Succ.getSUnit()); |
1772 | 19.9k | } |
1773 | 70.9k | // Open the top of the GlobalLI hole by constraining any earlier global uses |
1774 | 70.9k | // to precede the start of LocalLI. |
1775 | 58.6k | SmallVector<SUnit*,8> GlobalUses; |
1776 | 58.6k | MachineInstr *FirstLocalDef = |
1777 | 58.6k | LIS->getInstructionFromIndex(LocalLI->beginIndex()); |
1778 | 58.6k | SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); |
1779 | 81.2k | for (const SDep &Pred : GlobalSU->Preds) { |
1780 | 81.2k | if (Pred.getKind() != SDep::Anti || 81.2k Pred.getReg() != GlobalReg23.4k ) |
1781 | 57.8k | continue; |
1782 | 23.4k | if (23.4k Pred.getSUnit() == FirstLocalSU23.4k ) |
1783 | 9.43k | continue; |
1784 | 13.9k | if (13.9k !DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())13.9k ) |
1785 | 1.59k | return; |
1786 | 12.3k | GlobalUses.push_back(Pred.getSUnit()); |
1787 | 12.3k | } |
1788 | 57.0k | DEBUG57.0k (dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); |
1789 | 57.0k | // Add the weak edges. |
1790 | 57.0k | for (SmallVectorImpl<SUnit*>::const_iterator |
1791 | 67.4k | I = LocalUses.begin(), E = LocalUses.end(); I != E67.4k ; ++I10.4k ) { |
1792 | 10.4k | DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" |
1793 | 10.4k | << GlobalSU->NodeNum << ")\n"); |
1794 | 10.4k | DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); |
1795 | 10.4k | } |
1796 | 57.0k | for (SmallVectorImpl<SUnit*>::const_iterator |
1797 | 69.3k | I = GlobalUses.begin(), E = GlobalUses.end(); I != E69.3k ; ++I12.3k ) { |
1798 | 12.3k | DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" |
1799 | 12.3k | << FirstLocalSU->NodeNum << ")\n"); |
1800 | 12.3k | DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); |
1801 | 12.3k | } |
1802 | 6.14M | } |
1803 | | |
1804 | | /// \brief Callback from DAG postProcessing to create weak edges to encourage |
1805 | | /// copy elimination. |
1806 | 4.05M | void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { |
1807 | 4.05M | ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); |
1808 | 4.05M | assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); |
1809 | 4.05M | |
1810 | 4.05M | MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); |
1811 | 4.05M | if (FirstPos == DAG->end()) |
1812 | 1 | return; |
1813 | 4.05M | RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos); |
1814 | 4.05M | RegionEndIdx = DAG->getLIS()->getInstructionIndex( |
1815 | 4.05M | *priorNonDebug(DAG->end(), DAG->begin())); |
1816 | 4.05M | |
1817 | 16.2M | for (SUnit &SU : DAG->SUnits) { |
1818 | 16.2M | if (!SU.getInstr()->isCopy()) |
1819 | 10.1M | continue; |
1820 | 6.14M | |
1821 | 6.14M | constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG)); |
1822 | 6.14M | } |
1823 | 4.05M | } |
1824 | | |
1825 | | //===----------------------------------------------------------------------===// |
1826 | | // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler |
1827 | | // and possibly other custom schedulers. |
1828 | | //===----------------------------------------------------------------------===// |
1829 | | |
1830 | | static const unsigned InvalidCycle = ~0U; |
1831 | | |
1832 | 1.10M | SchedBoundary::~SchedBoundary() { delete HazardRec; } |
1833 | | |
1834 | 9.26M | void SchedBoundary::reset() { |
1835 | 9.26M | // A new HazardRec is created for each DAG and owned by SchedBoundary. |
1836 | 9.26M | // Destroying and reconstructing it is very expensive though. So keep |
1837 | 9.26M | // invalid, placeholder HazardRecs. |
1838 | 9.26M | if (HazardRec && 9.26M HazardRec->isEnabled()7.11M ) { |
1839 | 3.32k | delete HazardRec; |
1840 | 3.32k | HazardRec = nullptr; |
1841 | 3.32k | } |
1842 | 9.26M | Available.clear(); |
1843 | 9.26M | Pending.clear(); |
1844 | 9.26M | CheckPending = false; |
1845 | 9.26M | CurrCycle = 0; |
1846 | 9.26M | CurrMOps = 0; |
1847 | 9.26M | MinReadyCycle = std::numeric_limits<unsigned>::max(); |
1848 | 9.26M | ExpectedLatency = 0; |
1849 | 9.26M | DependentLatency = 0; |
1850 | 9.26M | RetiredMOps = 0; |
1851 | 9.26M | MaxExecutedResCount = 0; |
1852 | 9.26M | ZoneCritResIdx = 0; |
1853 | 9.26M | IsResourceLimited = false; |
1854 | 9.26M | ReservedCycles.clear(); |
1855 | | #ifndef NDEBUG |
1856 | | // Track the maximum number of stall cycles that could arise either from the |
1857 | | // latency of a DAG edge or the number of cycles that a processor resource is |
1858 | | // reserved (SchedBoundary::ReservedCycles). |
1859 | | MaxObservedStall = 0; |
1860 | | #endif |
1861 | | // Reserve a zero-count for invalid CritResIdx. |
1862 | 9.26M | ExecutedResCounts.resize(1); |
1863 | 9.26M | assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); |
1864 | 9.26M | } |
1865 | | |
1866 | | void SchedRemainder:: |
1867 | 4.08M | init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { |
1868 | 4.08M | reset(); |
1869 | 4.08M | if (!SchedModel->hasInstrSchedModel()) |
1870 | 84.1k | return; |
1871 | 3.99M | RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); |
1872 | 15.9M | for (SUnit &SU : DAG->SUnits) { |
1873 | 15.9M | const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); |
1874 | 15.9M | RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) |
1875 | 15.9M | * SchedModel->getMicroOpFactor(); |
1876 | 15.9M | for (TargetSchedModel::ProcResIter |
1877 | 15.9M | PI = SchedModel->getWriteProcResBegin(SC), |
1878 | 30.0M | PE = SchedModel->getWriteProcResEnd(SC); PI != PE30.0M ; ++PI14.0M ) { |
1879 | 14.0M | unsigned PIdx = PI->ProcResourceIdx; |
1880 | 14.0M | unsigned Factor = SchedModel->getResourceFactor(PIdx); |
1881 | 14.0M | RemainingCounts[PIdx] += (Factor * PI->Cycles); |
1882 | 14.0M | } |
1883 | 15.9M | } |
1884 | 4.08M | } |
1885 | | |
1886 | | void SchedBoundary:: |
1887 | 8.15M | init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { |
1888 | 8.15M | reset(); |
1889 | 8.15M | DAG = dag; |
1890 | 8.15M | SchedModel = smodel; |
1891 | 8.15M | Rem = rem; |
1892 | 8.15M | if (SchedModel->hasInstrSchedModel()8.15M ) { |
1893 | 7.99M | ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); |
1894 | 7.99M | ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); |
1895 | 7.99M | } |
1896 | 8.15M | } |
1897 | | |
1898 | | /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat |
1899 | | /// these "soft stalls" differently than the hard stall cycles based on CPU |
1900 | | /// resources and computed by checkHazard(). A fully in-order model |
1901 | | /// (MicroOpBufferSize==0) will not make use of this since instructions are not |
1902 | | /// available for scheduling until they are ready. However, a weaker in-order |
1903 | | /// model may use this for heuristics. For example, if a processor has in-order |
1904 | | /// behavior when reading certain resources, this may come into play. |
1905 | 125M | unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { |
1906 | 125M | if (!SU->isUnbuffered) |
1907 | 123M | return 0; |
1908 | 2.08M | |
1909 | 2.08M | unsigned ReadyCycle = (isTop() ? 2.08M SU->TopReadyCycle308k : SU->BotReadyCycle1.77M ); |
1910 | 2.08M | if (ReadyCycle > CurrCycle) |
1911 | 40.0k | return ReadyCycle - CurrCycle; |
1912 | 2.04M | return 0; |
1913 | 2.04M | } |
1914 | | |
1915 | | /// Compute the next cycle at which the given processor resource can be |
1916 | | /// scheduled. |
1917 | | unsigned SchedBoundary:: |
1918 | 14.0M | getNextResourceCycle(unsigned PIdx, unsigned Cycles) { |
1919 | 14.0M | unsigned NextUnreserved = ReservedCycles[PIdx]; |
1920 | 14.0M | // If this resource has never been used, always return cycle zero. |
1921 | 14.0M | if (NextUnreserved == InvalidCycle) |
1922 | 14.0M | return 0; |
1923 | 2.07k | // For bottom-up scheduling add the cycles needed for the current operation. |
1924 | 2.07k | if (2.07k !isTop()2.07k ) |
1925 | 927 | NextUnreserved += Cycles; |
1926 | 14.0M | return NextUnreserved; |
1927 | 14.0M | } |
1928 | | |
1929 | | /// Does this SU have a hazard within the current instruction group. |
1930 | | /// |
1931 | | /// The scheduler supports two modes of hazard recognition. The first is the |
1932 | | /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that |
1933 | | /// supports highly complicated in-order reservation tables |
1934 | | /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. |
1935 | | /// |
1936 | | /// The second is a streamlined mechanism that checks for hazards based on |
1937 | | /// simple counters that the scheduler itself maintains. It explicitly checks |
1938 | | /// for instruction dispatch limitations, including the number of micro-ops that |
1939 | | /// can dispatch per cycle. |
1940 | | /// |
1941 | | /// TODO: Also check whether the SU must start a new group. |
1942 | 79.2M | bool SchedBoundary::checkHazard(SUnit *SU) { |
1943 | 79.2M | if (HazardRec->isEnabled() |
1944 | 79.2M | && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard63.3k ) { |
1945 | 4.15k | return true; |
1946 | 4.15k | } |
1947 | 79.2M | |
1948 | 79.2M | unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); |
1949 | 79.2M | if ((CurrMOps > 0) && 79.2M (CurrMOps + uops > SchedModel->getIssueWidth())57.6M ) { |
1950 | 824k | DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" |
1951 | 824k | << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); |
1952 | 824k | return true; |
1953 | 824k | } |
1954 | 78.4M | |
1955 | 78.4M | if (78.4M CurrMOps > 0 && |
1956 | 56.8M | ((isTop() && 56.8M SchedModel->mustBeginGroup(SU->getInstr())11.5M ) || |
1957 | 78.4M | (!isTop() && 56.8M SchedModel->mustEndGroup(SU->getInstr())45.2M ))) { |
1958 | 0 | DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " |
1959 | 0 | << (isTop()? "begin" : "end") << " group\n"); |
1960 | 0 | return true; |
1961 | 0 | } |
1962 | 78.4M | |
1963 | 78.4M | if (78.4M SchedModel->hasInstrSchedModel() && 78.4M SU->hasReservedResource76.6M ) { |
1964 | 1.79k | const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
1965 | 1.79k | for (TargetSchedModel::ProcResIter |
1966 | 1.79k | PI = SchedModel->getWriteProcResBegin(SC), |
1967 | 3.62k | PE = SchedModel->getWriteProcResEnd(SC); PI != PE3.62k ; ++PI1.82k ) { |
1968 | 2.42k | unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); |
1969 | 2.42k | if (NRCycle > CurrCycle2.42k ) { |
1970 | | #ifndef NDEBUG |
1971 | | MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); |
1972 | | #endif |
1973 | 597 | DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " |
1974 | 597 | << SchedModel->getResourceName(PI->ProcResourceIdx) |
1975 | 597 | << "=" << NRCycle << "c\n"); |
1976 | 597 | return true; |
1977 | 597 | } |
1978 | 2.42k | } |
1979 | 1.79k | } |
1980 | 78.4M | return false; |
1981 | 79.2M | } |
1982 | | |
1983 | | // Find the unscheduled node in ReadySUs with the highest latency. |
1984 | | unsigned SchedBoundary:: |
1985 | 35.2M | findMaxLatency(ArrayRef<SUnit*> ReadySUs) { |
1986 | 35.2M | SUnit *LateSU = nullptr; |
1987 | 35.2M | unsigned RemLatency = 0; |
1988 | 111M | for (SUnit *SU : ReadySUs) { |
1989 | 111M | unsigned L = getUnscheduledLatency(SU); |
1990 | 111M | if (L > RemLatency111M ) { |
1991 | 13.8M | RemLatency = L; |
1992 | 13.8M | LateSU = SU; |
1993 | 13.8M | } |
1994 | 111M | } |
1995 | 35.2M | if (LateSU35.2M ) { |
1996 | 11.0M | DEBUG(dbgs() << Available.getName() << " RemLatency SU(" |
1997 | 11.0M | << LateSU->NodeNum << ") " << RemLatency << "c\n"); |
1998 | 11.0M | } |
1999 | 35.2M | return RemLatency; |
2000 | 35.2M | } |
2001 | | |
2002 | | // Count resources in this zone and the remaining unscheduled |
2003 | | // instruction. Return the max count, scaled. Set OtherCritIdx to the critical |
2004 | | // resource index, or zero if the zone is issue limited. |
2005 | | unsigned SchedBoundary:: |
2006 | 17.5M | getOtherResourceCount(unsigned &OtherCritIdx) { |
2007 | 17.5M | OtherCritIdx = 0; |
2008 | 17.5M | if (!SchedModel->hasInstrSchedModel()) |
2009 | 167k | return 0; |
2010 | 17.4M | |
2011 | 17.4M | unsigned OtherCritCount = Rem->RemIssueCount |
2012 | 17.4M | + (RetiredMOps * SchedModel->getMicroOpFactor()); |
2013 | 17.4M | DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " |
2014 | 17.4M | << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); |
2015 | 17.4M | for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); |
2016 | 241M | PIdx != PEnd241M ; ++PIdx224M ) { |
2017 | 224M | unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; |
2018 | 224M | if (OtherCount > OtherCritCount224M ) { |
2019 | 9.92M | OtherCritCount = OtherCount; |
2020 | 9.92M | OtherCritIdx = PIdx; |
2021 | 9.92M | } |
2022 | 224M | } |
2023 | 17.4M | if (OtherCritIdx17.4M ) { |
2024 | 9.43M | DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " |
2025 | 9.43M | << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) |
2026 | 9.43M | << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); |
2027 | 9.43M | } |
2028 | 17.5M | return OtherCritCount; |
2029 | 17.5M | } |
2030 | | |
2031 | 27.0M | void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { |
2032 | 27.0M | assert(SU->getInstr() && "Scheduled SUnit must have instr"); |
2033 | 27.0M | |
2034 | | #ifndef NDEBUG |
2035 | | // ReadyCycle was been bumped up to the CurrCycle when this node was |
2036 | | // scheduled, but CurrCycle may have been eagerly advanced immediately after |
2037 | | // scheduling, so may now be greater than ReadyCycle. |
2038 | | if (ReadyCycle > CurrCycle) |
2039 | | MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); |
2040 | | #endif |
2041 | | |
2042 | 27.0M | if (ReadyCycle < MinReadyCycle) |
2043 | 8.53M | MinReadyCycle = ReadyCycle; |
2044 | 27.0M | |
2045 | 27.0M | // Check for interlocks first. For the purpose of other heuristics, an |
2046 | 27.0M | // instruction that cannot issue appears as if it's not in the ReadyQueue. |
2047 | 27.0M | bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; |
2048 | 27.0M | if ((!IsBuffered && 27.0M ReadyCycle > CurrCycle257k ) || checkHazard(SU)27.0M || |
2049 | 26.9M | Available.size() >= ReadyListLimit) |
2050 | 173k | Pending.push(SU); |
2051 | 27.0M | else |
2052 | 26.9M | Available.push(SU); |
2053 | 27.0M | } |
2054 | | |
2055 | | /// Move the boundary of scheduled code by one cycle. |
2056 | 1.34M | void SchedBoundary::bumpCycle(unsigned NextCycle) { |
2057 | 1.34M | if (SchedModel->getMicroOpBufferSize() == 01.34M ) { |
2058 | 165k | assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && |
2059 | 165k | "MinReadyCycle uninitialized"); |
2060 | 165k | if (MinReadyCycle > NextCycle) |
2061 | 16.8k | NextCycle = MinReadyCycle; |
2062 | 165k | } |
2063 | 1.34M | // Update the current micro-ops, which will issue in the next cycle. |
2064 | 1.34M | unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); |
2065 | 1.34M | CurrMOps = (CurrMOps <= DecMOps) ? 01.32M : CurrMOps - DecMOps14.6k ; |
2066 | 1.34M | |
2067 | 1.34M | // Decrement DependentLatency based on the next cycle. |
2068 | 1.34M | if ((NextCycle - CurrCycle) > DependentLatency) |
2069 | 169k | DependentLatency = 0; |
2070 | 1.34M | else |
2071 | 1.17M | DependentLatency -= (NextCycle - CurrCycle); |
2072 | 1.34M | |
2073 | 1.34M | if (!HazardRec->isEnabled()1.34M ) { |
2074 | 1.31M | // Bypass HazardRec virtual calls. |
2075 | 1.31M | CurrCycle = NextCycle; |
2076 | 1.34M | } else { |
2077 | 24.4k | // Bypass getHazardType calls in case of long latency. |
2078 | 63.5k | for (; CurrCycle != NextCycle63.5k ; ++CurrCycle39.1k ) { |
2079 | 39.1k | if (isTop()) |
2080 | 985 | HazardRec->AdvanceCycle(); |
2081 | 39.1k | else |
2082 | 38.1k | HazardRec->RecedeCycle(); |
2083 | 39.1k | } |
2084 | 24.4k | } |
2085 | 1.34M | CheckPending = true; |
2086 | 1.34M | unsigned LFactor = SchedModel->getLatencyFactor(); |
2087 | 1.34M | IsResourceLimited = |
2088 | 1.34M | (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) |
2089 | 1.34M | > (int)LFactor; |
2090 | 1.34M | |
2091 | 1.34M | DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); |
2092 | 1.34M | } |
2093 | | |
2094 | 14.0M | void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { |
2095 | 14.0M | ExecutedResCounts[PIdx] += Count; |
2096 | 14.0M | if (ExecutedResCounts[PIdx] > MaxExecutedResCount) |
2097 | 9.95M | MaxExecutedResCount = ExecutedResCounts[PIdx]; |
2098 | 14.0M | } |
2099 | | |
2100 | | /// Add the given processor resource to this scheduled zone. |
2101 | | /// |
2102 | | /// \param Cycles indicates the number of consecutive (non-pipelined) cycles |
2103 | | /// during which this resource is consumed. |
2104 | | /// |
2105 | | /// \return the next cycle at which the instruction may execute without |
2106 | | /// oversubscribing resources. |
2107 | | unsigned SchedBoundary:: |
2108 | 14.0M | countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { |
2109 | 14.0M | unsigned Factor = SchedModel->getResourceFactor(PIdx); |
2110 | 14.0M | unsigned Count = Factor * Cycles; |
2111 | 14.0M | DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) |
2112 | 14.0M | << " +" << Cycles << "x" << Factor << "u\n"); |
2113 | 14.0M | |
2114 | 14.0M | // Update Executed resources counts. |
2115 | 14.0M | incExecutedResources(PIdx, Count); |
2116 | 14.0M | assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); |
2117 | 14.0M | Rem->RemainingCounts[PIdx] -= Count; |
2118 | 14.0M | |
2119 | 14.0M | // Check if this resource exceeds the current critical resource. If so, it |
2120 | 14.0M | // becomes the critical resource. |
2121 | 14.0M | if (ZoneCritResIdx != PIdx && 14.0M (getResourceCount(PIdx) > getCriticalCount())9.66M ) { |
2122 | 3.96M | ZoneCritResIdx = PIdx; |
2123 | 3.96M | DEBUG(dbgs() << " *** Critical resource " |
2124 | 3.96M | << SchedModel->getResourceName(PIdx) << ": " |
2125 | 3.96M | << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); |
2126 | 3.96M | } |
2127 | 14.0M | // For reserved resources, record the highest cycle using the resource. |
2128 | 14.0M | unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); |
2129 | 14.0M | if (NextAvailable > CurrCycle14.0M ) { |
2130 | 4 | DEBUG(dbgs() << " Resource conflict: " |
2131 | 4 | << SchedModel->getProcResource(PIdx)->Name << " reserved until @" |
2132 | 4 | << NextAvailable << "\n"); |
2133 | 4 | } |
2134 | 14.0M | return NextAvailable; |
2135 | 14.0M | } |
2136 | | |
2137 | | /// Move the boundary of scheduled code by one SUnit. |
2138 | 16.5M | void SchedBoundary::bumpNode(SUnit *SU) { |
2139 | 16.5M | // Update the reservation table. |
2140 | 16.5M | if (HazardRec->isEnabled()16.5M ) { |
2141 | 33.2k | if (!isTop() && 33.2k SU->isCall27.3k ) { |
2142 | 0 | // Calls are scheduled with their preceding instructions. For bottom-up |
2143 | 0 | // scheduling, clear the pipeline state before emitting. |
2144 | 0 | HazardRec->Reset(); |
2145 | 0 | } |
2146 | 33.2k | HazardRec->EmitInstruction(SU); |
2147 | 33.2k | } |
2148 | 16.5M | // checkHazard should prevent scheduling multiple instructions per cycle that |
2149 | 16.5M | // exceed the issue width. |
2150 | 16.5M | const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
2151 | 16.5M | unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); |
2152 | 16.5M | assert( |
2153 | 16.5M | (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && |
2154 | 16.5M | "Cannot schedule this instruction's MicroOps in the current cycle."); |
2155 | 16.5M | |
2156 | 16.5M | unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle1.37M : SU->BotReadyCycle15.1M ); |
2157 | 16.5M | DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); |
2158 | 16.5M | |
2159 | 16.5M | unsigned NextCycle = CurrCycle; |
2160 | 16.5M | switch (SchedModel->getMicroOpBufferSize()) { |
2161 | 197k | case 0: |
2162 | 197k | assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); |
2163 | 197k | break; |
2164 | 202k | case 1: |
2165 | 202k | if (ReadyCycle > NextCycle202k ) { |
2166 | 17.8k | NextCycle = ReadyCycle; |
2167 | 17.8k | DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); |
2168 | 17.8k | } |
2169 | 202k | break; |
2170 | 16.1M | default: |
2171 | 16.1M | // We don't currently model the OOO reorder buffer, so consider all |
2172 | 16.1M | // scheduled MOps to be "retired". We do loosely model in-order resource |
2173 | 16.1M | // latency. If this instruction uses an in-order resource, account for any |
2174 | 16.1M | // likely stall cycles. |
2175 | 16.1M | if (SU->isUnbuffered && 16.1M ReadyCycle > NextCycle485 ) |
2176 | 314 | NextCycle = ReadyCycle; |
2177 | 16.1M | break; |
2178 | 16.5M | } |
2179 | 16.5M | RetiredMOps += IncMOps; |
2180 | 16.5M | |
2181 | 16.5M | // Update resource counts and critical resource. |
2182 | 16.5M | if (SchedModel->hasInstrSchedModel()16.5M ) { |
2183 | 15.9M | unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); |
2184 | 15.9M | assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); |
2185 | 15.9M | Rem->RemIssueCount -= DecRemIssue; |
2186 | 15.9M | if (ZoneCritResIdx15.9M ) { |
2187 | 7.02M | // Scale scheduled micro-ops for comparing with the critical resource. |
2188 | 7.02M | unsigned ScaledMOps = |
2189 | 7.02M | RetiredMOps * SchedModel->getMicroOpFactor(); |
2190 | 7.02M | |
2191 | 7.02M | // If scaled micro-ops are now more than the previous critical resource by |
2192 | 7.02M | // a full cycle, then micro-ops issue becomes critical. |
2193 | 7.02M | if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) |
2194 | 7.02M | >= (int)SchedModel->getLatencyFactor()) { |
2195 | 3.67k | ZoneCritResIdx = 0; |
2196 | 3.67k | DEBUG(dbgs() << " *** Critical resource NumMicroOps: " |
2197 | 3.67k | << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); |
2198 | 3.67k | } |
2199 | 7.02M | } |
2200 | 15.9M | for (TargetSchedModel::ProcResIter |
2201 | 15.9M | PI = SchedModel->getWriteProcResBegin(SC), |
2202 | 30.0M | PE = SchedModel->getWriteProcResEnd(SC); PI != PE30.0M ; ++PI14.0M ) { |
2203 | 14.0M | unsigned RCycle = |
2204 | 14.0M | countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); |
2205 | 14.0M | if (RCycle > NextCycle) |
2206 | 4 | NextCycle = RCycle; |
2207 | 14.0M | } |
2208 | 15.9M | if (SU->hasReservedResource15.9M ) { |
2209 | 589 | // For reserved resources, record the highest cycle using the resource. |
2210 | 589 | // For top-down scheduling, this is the cycle in which we schedule this |
2211 | 589 | // instruction plus the number of cycles the operations reserves the |
2212 | 589 | // resource. For bottom-up is it simply the instruction's cycle. |
2213 | 589 | for (TargetSchedModel::ProcResIter |
2214 | 589 | PI = SchedModel->getWriteProcResBegin(SC), |
2215 | 1.32k | PE = SchedModel->getWriteProcResEnd(SC); PI != PE1.32k ; ++PI737 ) { |
2216 | 737 | unsigned PIdx = PI->ProcResourceIdx; |
2217 | 737 | if (SchedModel->getProcResource(PIdx)->BufferSize == 0737 ) { |
2218 | 589 | if (isTop()589 ) { |
2219 | 303 | ReservedCycles[PIdx] = |
2220 | 303 | std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); |
2221 | 303 | } |
2222 | 589 | else |
2223 | 286 | ReservedCycles[PIdx] = NextCycle; |
2224 | 589 | } |
2225 | 737 | } |
2226 | 589 | } |
2227 | 15.9M | } |
2228 | 16.5M | // Update ExpectedLatency and DependentLatency. |
2229 | 16.5M | unsigned &TopLatency = isTop() ? ExpectedLatency1.37M : DependentLatency15.1M ; |
2230 | 16.5M | unsigned &BotLatency = isTop() ? DependentLatency1.37M : ExpectedLatency15.1M ; |
2231 | 16.5M | if (SU->getDepth() > TopLatency16.5M ) { |
2232 | 2.77M | TopLatency = SU->getDepth(); |
2233 | 2.77M | DEBUG(dbgs() << " " << Available.getName() |
2234 | 2.77M | << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); |
2235 | 2.77M | } |
2236 | 16.5M | if (SU->getHeight() > BotLatency16.5M ) { |
2237 | 5.57M | BotLatency = SU->getHeight(); |
2238 | 5.57M | DEBUG(dbgs() << " " << Available.getName() |
2239 | 5.57M | << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); |
2240 | 5.57M | } |
2241 | 16.5M | // If we stall for any reason, bump the cycle. |
2242 | 16.5M | if (NextCycle > CurrCycle16.5M ) { |
2243 | 18.1k | bumpCycle(NextCycle); |
2244 | 16.5M | } else { |
2245 | 16.5M | // After updating ZoneCritResIdx and ExpectedLatency, check if we're |
2246 | 16.5M | // resource limited. If a stall occurred, bumpCycle does this. |
2247 | 16.5M | unsigned LFactor = SchedModel->getLatencyFactor(); |
2248 | 16.5M | IsResourceLimited = |
2249 | 16.5M | (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) |
2250 | 16.5M | > (int)LFactor; |
2251 | 16.5M | } |
2252 | 16.5M | // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle |
2253 | 16.5M | // resets CurrMOps. Loop to handle instructions with more MOps than issue in |
2254 | 16.5M | // one cycle. Since we commonly reach the max MOps here, opportunistically |
2255 | 16.5M | // bump the cycle to avoid uselessly checking everything in the readyQ. |
2256 | 16.5M | CurrMOps += IncMOps; |
2257 | 16.5M | |
2258 | 16.5M | // Bump the cycle count for issue group constraints. |
2259 | 16.5M | // This must be done after NextCycle has been adjust for all other stalls. |
2260 | 16.5M | // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set |
2261 | 16.5M | // currCycle to X. |
2262 | 16.5M | if ((isTop() && 16.5M SchedModel->mustEndGroup(SU->getInstr())1.37M ) || |
2263 | 16.5M | (!isTop() && 16.5M SchedModel->mustBeginGroup(SU->getInstr())15.1M )) { |
2264 | 0 | DEBUG(dbgs() << " Bump cycle to " |
2265 | 0 | << (isTop() ? "end" : "begin") << " group\n"); |
2266 | 0 | bumpCycle(++NextCycle); |
2267 | 0 | } |
2268 | 16.5M | |
2269 | 17.7M | while (CurrMOps >= SchedModel->getIssueWidth()17.7M ) { |
2270 | 1.16M | DEBUG(dbgs() << " *** Max MOps " << CurrMOps |
2271 | 1.16M | << " at cycle " << CurrCycle << '\n'); |
2272 | 1.16M | bumpCycle(++NextCycle); |
2273 | 1.16M | } |
2274 | 16.5M | DEBUG(dumpScheduledState()); |
2275 | 16.5M | } |
2276 | | |
2277 | | /// Release pending ready nodes in to the available queue. This makes them |
2278 | | /// visible to heuristics. |
2279 | 1.13M | void SchedBoundary::releasePending() { |
2280 | 1.13M | // If the available queue is empty, it is safe to reset MinReadyCycle. |
2281 | 1.13M | if (Available.empty()) |
2282 | 192k | MinReadyCycle = std::numeric_limits<unsigned>::max(); |
2283 | 1.13M | |
2284 | 1.13M | // Check to see if any of the pending instructions are ready to issue. If |
2285 | 1.13M | // so, add them to the available queue. |
2286 | 1.13M | bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; |
2287 | 2.07M | for (unsigned i = 0, e = Pending.size(); i != e2.07M ; ++i936k ) { |
2288 | 936k | SUnit *SU = *(Pending.begin()+i); |
2289 | 936k | unsigned ReadyCycle = isTop() ? SU->TopReadyCycle35.9k : SU->BotReadyCycle900k ; |
2290 | 936k | |
2291 | 936k | if (ReadyCycle < MinReadyCycle) |
2292 | 203k | MinReadyCycle = ReadyCycle; |
2293 | 936k | |
2294 | 936k | if (!IsBuffered && 936k ReadyCycle > CurrCycle124k ) |
2295 | 54.2k | continue; |
2296 | 882k | |
2297 | 882k | if (882k checkHazard(SU)882k ) |
2298 | 244 | continue; |
2299 | 881k | |
2300 | 881k | if (881k Available.size() >= ReadyListLimit881k ) |
2301 | 284 | break; |
2302 | 881k | |
2303 | 881k | Available.push(SU); |
2304 | 881k | Pending.remove(Pending.begin()+i); |
2305 | 881k | --i; --e; |
2306 | 881k | } |
2307 | 1.13M | CheckPending = false; |
2308 | 1.13M | } |
2309 | | |
2310 | | /// Remove SU from the ready set for this boundary. |
2311 | 27.0M | void SchedBoundary::removeReady(SUnit *SU) { |
2312 | 27.0M | if (Available.isInQueue(SU)) |
2313 | 27.0M | Available.remove(Available.find(SU)); |
2314 | 20.4k | else { |
2315 | 20.4k | assert(Pending.isInQueue(SU) && "bad ready count"); |
2316 | 20.4k | Pending.remove(Pending.find(SU)); |
2317 | 20.4k | } |
2318 | 27.0M | } |
2319 | | |
2320 | | /// If this queue only has one ready candidate, return it. As a side effect, |
2321 | | /// defer any nodes that now hit a hazard, and advance the cycle until at least |
2322 | | /// one node is ready. If multiple instructions are ready, return NULL. |
2323 | 25.5M | SUnit *SchedBoundary::pickOnlyChoice() { |
2324 | 25.5M | if (CheckPending) |
2325 | 981k | releasePending(); |
2326 | 25.5M | |
2327 | 25.5M | if (CurrMOps > 025.5M ) { |
2328 | 11.9M | // Defer any ready instrs that now have a hazard. |
2329 | 63.3M | for (ReadyQueue::iterator I = Available.begin(); I != Available.end()63.3M ;) { |
2330 | 51.3M | if (checkHazard(*I)51.3M ) { |
2331 | 728k | Pending.push(*I); |
2332 | 728k | I = Available.remove(I); |
2333 | 728k | continue; |
2334 | 728k | } |
2335 | 50.6M | ++I; |
2336 | 50.6M | } |
2337 | 11.9M | } |
2338 | 25.7M | for (unsigned i = 0; Available.empty()25.7M ; ++i157k ) { |
2339 | 157k | // FIXME: Re-enable assert once PR20057 is resolved. |
2340 | 157k | // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && |
2341 | 157k | // "permanent hazard"); |
2342 | 157k | (void)i; |
2343 | 157k | bumpCycle(CurrCycle + 1); |
2344 | 157k | releasePending(); |
2345 | 157k | } |
2346 | 25.5M | |
2347 | 25.5M | DEBUG(Pending.dump()); |
2348 | 25.5M | DEBUG(Available.dump()); |
2349 | 25.5M | |
2350 | 25.5M | if (Available.size() == 1) |
2351 | 7.37M | return *Available.begin(); |
2352 | 18.1M | return nullptr; |
2353 | 18.1M | } |
2354 | | |
2355 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2356 | | // This is useful information to dump after bumpNode. |
2357 | | // Note that the Queue contents are more useful before pickNodeFromQueue. |
2358 | | LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { |
2359 | | unsigned ResFactor; |
2360 | | unsigned ResCount; |
2361 | | if (ZoneCritResIdx) { |
2362 | | ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); |
2363 | | ResCount = getResourceCount(ZoneCritResIdx); |
2364 | | } else { |
2365 | | ResFactor = SchedModel->getMicroOpFactor(); |
2366 | | ResCount = RetiredMOps * ResFactor; |
2367 | | } |
2368 | | unsigned LFactor = SchedModel->getLatencyFactor(); |
2369 | | dbgs() << Available.getName() << " @" << CurrCycle << "c\n" |
2370 | | << " Retired: " << RetiredMOps; |
2371 | | dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; |
2372 | | dbgs() << "\n Critical: " << ResCount / LFactor << "c, " |
2373 | | << ResCount / ResFactor << " " |
2374 | | << SchedModel->getResourceName(ZoneCritResIdx) |
2375 | | << "\n ExpectedLatency: " << ExpectedLatency << "c\n" |
2376 | | << (IsResourceLimited ? " - Resource" : " - Latency") |
2377 | | << " limited.\n"; |
2378 | | } |
2379 | | #endif |
2380 | | |
2381 | | //===----------------------------------------------------------------------===// |
2382 | | // GenericScheduler - Generic implementation of MachineSchedStrategy. |
2383 | | //===----------------------------------------------------------------------===// |
2384 | | |
2385 | | void GenericSchedulerBase::SchedCandidate:: |
2386 | | initResourceDelta(const ScheduleDAGMI *DAG, |
2387 | 82.0M | const TargetSchedModel *SchedModel) { |
2388 | 82.0M | if (!Policy.ReduceResIdx && 82.0M !Policy.DemandResIdx79.2M ) |
2389 | 58.6M | return; |
2390 | 23.3M | |
2391 | 23.3M | const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
2392 | 23.3M | for (TargetSchedModel::ProcResIter |
2393 | 23.3M | PI = SchedModel->getWriteProcResBegin(SC), |
2394 | 48.3M | PE = SchedModel->getWriteProcResEnd(SC); PI != PE48.3M ; ++PI25.0M ) { |
2395 | 25.0M | if (PI->ProcResourceIdx == Policy.ReduceResIdx) |
2396 | 2.05M | ResDelta.CritResources += PI->Cycles; |
2397 | 25.0M | if (PI->ProcResourceIdx == Policy.DemandResIdx) |
2398 | 16.1M | ResDelta.DemandedResources += PI->Cycles; |
2399 | 25.0M | } |
2400 | 82.0M | } |
2401 | | |
2402 | | /// Set the CandPolicy given a scheduling zone given the current resources and |
2403 | | /// latencies inside and outside the zone. |
2404 | | void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, |
2405 | | SchedBoundary &CurrZone, |
2406 | 17.6M | SchedBoundary *OtherZone) { |
2407 | 17.6M | // Apply preemptive heuristics based on the total latency and resources |
2408 | 17.6M | // inside and outside this zone. Potential stalls should be considered before |
2409 | 17.6M | // following this policy. |
2410 | 17.6M | |
2411 | 17.6M | // Compute remaining latency. We need this both to determine whether the |
2412 | 17.6M | // overall schedule has become latency-limited and whether the instructions |
2413 | 17.6M | // outside this zone are resource or latency limited. |
2414 | 17.6M | // |
2415 | 17.6M | // The "dependent" latency is updated incrementally during scheduling as the |
2416 | 17.6M | // max height/depth of scheduled nodes minus the cycles since it was |
2417 | 17.6M | // scheduled: |
2418 | 17.6M | // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone |
2419 | 17.6M | // |
2420 | 17.6M | // The "independent" latency is the max ready queue depth: |
2421 | 17.6M | // ILat = max N.depth for N in Available|Pending |
2422 | 17.6M | // |
2423 | 17.6M | // RemainingLatency is the greater of independent and dependent latency. |
2424 | 17.6M | unsigned RemLatency = CurrZone.getDependentLatency(); |
2425 | 17.6M | RemLatency = std::max(RemLatency, |
2426 | 17.6M | CurrZone.findMaxLatency(CurrZone.Available.elements())); |
2427 | 17.6M | RemLatency = std::max(RemLatency, |
2428 | 17.6M | CurrZone.findMaxLatency(CurrZone.Pending.elements())); |
2429 | 17.6M | |
2430 | 17.6M | // Compute the critical resource outside the zone. |
2431 | 17.6M | unsigned OtherCritIdx = 0; |
2432 | 17.6M | unsigned OtherCount = |
2433 | 17.6M | OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx)17.5M : 08.89k ; |
2434 | 17.6M | |
2435 | 17.6M | bool OtherResLimited = false; |
2436 | 17.6M | if (SchedModel->hasInstrSchedModel()17.6M ) { |
2437 | 17.4M | unsigned LFactor = SchedModel->getLatencyFactor(); |
2438 | 17.4M | OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; |
2439 | 17.4M | } |
2440 | 17.6M | // Schedule aggressively for latency in PostRA mode. We don't check for |
2441 | 17.6M | // acyclic latency during PostRA, and highly out-of-order processors will |
2442 | 17.6M | // skip PostRA scheduling. |
2443 | 17.6M | if (!OtherResLimited17.6M ) { |
2444 | 15.2M | if (IsPostRA || 15.2M (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)15.2M ) { |
2445 | 790k | Policy.ReduceLatency |= true; |
2446 | 790k | DEBUG(dbgs() << " " << CurrZone.Available.getName() |
2447 | 790k | << " RemainingLatency " << RemLatency << " + " |
2448 | 790k | << CurrZone.getCurrCycle() << "c > CritPath " |
2449 | 790k | << Rem.CriticalPath << "\n"); |
2450 | 790k | } |
2451 | 15.2M | } |
2452 | 17.6M | // If the same resource is limiting inside and outside the zone, do nothing. |
2453 | 17.6M | if (CurrZone.getZoneCritResIdx() == OtherCritIdx) |
2454 | 10.5M | return; |
2455 | 7.10M | |
2456 | 7.10M | DEBUG7.10M ( |
2457 | 7.10M | if (CurrZone.isResourceLimited()) { |
2458 | 7.10M | dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " |
2459 | 7.10M | << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) |
2460 | 7.10M | << "\n"; |
2461 | 7.10M | } |
2462 | 7.10M | if (OtherResLimited) |
2463 | 7.10M | dbgs() << " RemainingLimit: " |
2464 | 7.10M | << SchedModel->getResourceName(OtherCritIdx) << "\n"; |
2465 | 7.10M | if (!CurrZone.isResourceLimited() && !OtherResLimited) |
2466 | 7.10M | dbgs() << " Latency limited both directions.\n"); |
2467 | 7.10M | |
2468 | 7.10M | if (CurrZone.isResourceLimited() && 7.10M !Policy.ReduceResIdx158k ) |
2469 | 158k | Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); |
2470 | 7.10M | |
2471 | 7.10M | if (OtherResLimited) |
2472 | 1.30M | Policy.DemandResIdx = OtherCritIdx; |
2473 | 17.6M | } |
2474 | | |
2475 | | #ifndef NDEBUG |
2476 | | const char *GenericSchedulerBase::getReasonStr( |
2477 | | GenericSchedulerBase::CandReason Reason) { |
2478 | | switch (Reason) { |
2479 | | case NoCand: return "NOCAND "; |
2480 | | case Only1: return "ONLY1 "; |
2481 | | case PhysRegCopy: return "PREG-COPY "; |
2482 | | case RegExcess: return "REG-EXCESS"; |
2483 | | case RegCritical: return "REG-CRIT "; |
2484 | | case Stall: return "STALL "; |
2485 | | case Cluster: return "CLUSTER "; |
2486 | | case Weak: return "WEAK "; |
2487 | | case RegMax: return "REG-MAX "; |
2488 | | case ResourceReduce: return "RES-REDUCE"; |
2489 | | case ResourceDemand: return "RES-DEMAND"; |
2490 | | case TopDepthReduce: return "TOP-DEPTH "; |
2491 | | case TopPathReduce: return "TOP-PATH "; |
2492 | | case BotHeightReduce:return "BOT-HEIGHT"; |
2493 | | case BotPathReduce: return "BOT-PATH "; |
2494 | | case NextDefUse: return "DEF-USE "; |
2495 | | case NodeOrder: return "ORDER "; |
2496 | | }; |
2497 | | llvm_unreachable("Unknown reason!"); |
2498 | | } |
2499 | | |
2500 | | void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { |
2501 | | PressureChange P; |
2502 | | unsigned ResIdx = 0; |
2503 | | unsigned Latency = 0; |
2504 | | switch (Cand.Reason) { |
2505 | | default: |
2506 | | break; |
2507 | | case RegExcess: |
2508 | | P = Cand.RPDelta.Excess; |
2509 | | break; |
2510 | | case RegCritical: |
2511 | | P = Cand.RPDelta.CriticalMax; |
2512 | | break; |
2513 | | case RegMax: |
2514 | | P = Cand.RPDelta.CurrentMax; |
2515 | | break; |
2516 | | case ResourceReduce: |
2517 | | ResIdx = Cand.Policy.ReduceResIdx; |
2518 | | break; |
2519 | | case ResourceDemand: |
2520 | | ResIdx = Cand.Policy.DemandResIdx; |
2521 | | break; |
2522 | | case TopDepthReduce: |
2523 | | Latency = Cand.SU->getDepth(); |
2524 | | break; |
2525 | | case TopPathReduce: |
2526 | | Latency = Cand.SU->getHeight(); |
2527 | | break; |
2528 | | case BotHeightReduce: |
2529 | | Latency = Cand.SU->getHeight(); |
2530 | | break; |
2531 | | case BotPathReduce: |
2532 | | Latency = Cand.SU->getDepth(); |
2533 | | break; |
2534 | | } |
2535 | | dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); |
2536 | | if (P.isValid()) |
2537 | | dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) |
2538 | | << ":" << P.getUnitInc() << " "; |
2539 | | else |
2540 | | dbgs() << " "; |
2541 | | if (ResIdx) |
2542 | | dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; |
2543 | | else |
2544 | | dbgs() << " "; |
2545 | | if (Latency) |
2546 | | dbgs() << " " << Latency << " cycles "; |
2547 | | else |
2548 | | dbgs() << " "; |
2549 | | dbgs() << '\n'; |
2550 | | } |
2551 | | #endif |
2552 | | |
2553 | | /// Return true if this heuristic determines order. |
2554 | | static bool tryLess(int TryVal, int CandVal, |
2555 | | GenericSchedulerBase::SchedCandidate &TryCand, |
2556 | | GenericSchedulerBase::SchedCandidate &Cand, |
2557 | 323M | GenericSchedulerBase::CandReason Reason) { |
2558 | 323M | if (TryVal < CandVal323M ) { |
2559 | 506k | TryCand.Reason = Reason; |
2560 | 506k | return true; |
2561 | 506k | } |
2562 | 322M | if (322M TryVal > CandVal322M ) { |
2563 | 6.56M | if (Cand.Reason > Reason) |
2564 | 2.57M | Cand.Reason = Reason; |
2565 | 6.56M | return true; |
2566 | 6.56M | } |
2567 | 315M | return false; |
2568 | 315M | } |
2569 | | |
2570 | | static bool tryGreater(int TryVal, int CandVal, |
2571 | | GenericSchedulerBase::SchedCandidate &TryCand, |
2572 | | GenericSchedulerBase::SchedCandidate &Cand, |
2573 | 363M | GenericSchedulerBase::CandReason Reason) { |
2574 | 363M | if (TryVal > CandVal363M ) { |
2575 | 4.28M | TryCand.Reason = Reason; |
2576 | 4.28M | return true; |
2577 | 4.28M | } |
2578 | 359M | if (359M TryVal < CandVal359M ) { |
2579 | 12.9M | if (Cand.Reason > Reason) |
2580 | 4.11M | Cand.Reason = Reason; |
2581 | 12.9M | return true; |
2582 | 12.9M | } |
2583 | 346M | return false; |
2584 | 346M | } |
2585 | | |
2586 | | static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, |
2587 | | GenericSchedulerBase::SchedCandidate &Cand, |
2588 | 9.75M | SchedBoundary &Zone) { |
2589 | 9.75M | if (Zone.isTop()9.75M ) { |
2590 | 7.79M | if (Cand.SU->getDepth() > Zone.getScheduledLatency()7.79M ) { |
2591 | 7.55M | if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), |
2592 | 7.55M | TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) |
2593 | 127 | return true; |
2594 | 7.79M | } |
2595 | 7.79M | if (7.79M tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), |
2596 | 7.79M | TryCand, Cand, GenericSchedulerBase::TopPathReduce)) |
2597 | 112k | return true; |
2598 | 1.96M | } else { |
2599 | 1.96M | if (Cand.SU->getHeight() > Zone.getScheduledLatency()1.96M ) { |
2600 | 11.7k | if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), |
2601 | 11.7k | TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) |
2602 | 5.54k | return true; |
2603 | 1.95M | } |
2604 | 1.95M | if (1.95M tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), |
2605 | 1.95M | TryCand, Cand, GenericSchedulerBase::BotPathReduce)) |
2606 | 318k | return true; |
2607 | 9.32M | } |
2608 | 9.32M | return false; |
2609 | 9.32M | } |
2610 | | |
2611 | 15.9M | static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { |
2612 | 15.9M | DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") |
2613 | 15.9M | << GenericSchedulerBase::getReasonStr(Reason) << '\n'); |
2614 | 15.9M | } |
2615 | | |
2616 | 8.97M | static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { |
2617 | 8.97M | tracePick(Cand.Reason, Cand.AtTop); |
2618 | 8.97M | } |
2619 | | |
2620 | 4.07M | void GenericScheduler::initialize(ScheduleDAGMI *dag) { |
2621 | 4.07M | assert(dag->hasVRegLiveness() && |
2622 | 4.07M | "(PreRA)GenericScheduler needs vreg liveness"); |
2623 | 4.07M | DAG = static_cast<ScheduleDAGMILive*>(dag); |
2624 | 4.07M | SchedModel = DAG->getSchedModel(); |
2625 | 4.07M | TRI = DAG->TRI; |
2626 | 4.07M | |
2627 | 4.07M | Rem.init(DAG, SchedModel); |
2628 | 4.07M | Top.init(DAG, SchedModel, &Rem); |
2629 | 4.07M | Bot.init(DAG, SchedModel, &Rem); |
2630 | 4.07M | |
2631 | 4.07M | // Initialize resource counts. |
2632 | 4.07M | |
2633 | 4.07M | // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or |
2634 | 4.07M | // are disabled, then these HazardRecs will be disabled. |
2635 | 4.07M | const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); |
2636 | 4.07M | if (!Top.HazardRec4.07M ) { |
2637 | 519k | Top.HazardRec = |
2638 | 519k | DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( |
2639 | 519k | Itin, DAG); |
2640 | 519k | } |
2641 | 4.07M | if (!Bot.HazardRec4.07M ) { |
2642 | 519k | Bot.HazardRec = |
2643 | 519k | DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( |
2644 | 519k | Itin, DAG); |
2645 | 519k | } |
2646 | 4.07M | TopCand.SU = nullptr; |
2647 | 4.07M | BotCand.SU = nullptr; |
2648 | 4.07M | } |
2649 | | |
2650 | | /// Initialize the per-region scheduling policy. |
2651 | | void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, |
2652 | | MachineBasicBlock::iterator End, |
2653 | 11.7M | unsigned NumRegionInstrs) { |
2654 | 11.7M | const MachineFunction &MF = *Begin->getParent()->getParent(); |
2655 | 11.7M | const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); |
2656 | 11.7M | |
2657 | 11.7M | // Avoid setting up the register pressure tracker for small regions to save |
2658 | 11.7M | // compile time. As a rough heuristic, only track pressure when the number of |
2659 | 11.7M | // schedulable instructions exceeds half the integer register file. |
2660 | 11.7M | RegionPolicy.ShouldTrackPressure = true; |
2661 | 47.1M | for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i147.1M ; --VT35.3M ) { |
2662 | 35.3M | MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; |
2663 | 35.3M | if (TLI->isTypeLegal(LegalIntVT)35.3M ) { |
2664 | 12.3M | unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( |
2665 | 12.3M | TLI->getRegClassFor(LegalIntVT)); |
2666 | 12.3M | RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); |
2667 | 12.3M | } |
2668 | 35.3M | } |
2669 | 11.7M | |
2670 | 11.7M | // For generic targets, we default to bottom-up, because it's simpler and more |
2671 | 11.7M | // compile-time optimizations have been implemented in that direction. |
2672 | 11.7M | RegionPolicy.OnlyBottomUp = true; |
2673 | 11.7M | |
2674 | 11.7M | // Allow the subtarget to override default policy. |
2675 | 11.7M | MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); |
2676 | 11.7M | |
2677 | 11.7M | // After subtarget overrides, apply command line options. |
2678 | 11.7M | if (!EnableRegPressure) |
2679 | 0 | RegionPolicy.ShouldTrackPressure = false; |
2680 | 11.7M | |
2681 | 11.7M | // Check -misched-topdown/bottomup can force or unforce scheduling direction. |
2682 | 11.7M | // e.g. -misched-bottomup=false allows scheduling in both directions. |
2683 | 11.7M | assert((!ForceTopDown || !ForceBottomUp) && |
2684 | 11.7M | "-misched-topdown incompatible with -misched-bottomup"); |
2685 | 11.7M | if (ForceBottomUp.getNumOccurrences() > 011.7M ) { |
2686 | 0 | RegionPolicy.OnlyBottomUp = ForceBottomUp; |
2687 | 0 | if (RegionPolicy.OnlyBottomUp) |
2688 | 0 | RegionPolicy.OnlyTopDown = false; |
2689 | 0 | } |
2690 | 11.7M | if (ForceTopDown.getNumOccurrences() > 011.7M ) { |
2691 | 4 | RegionPolicy.OnlyTopDown = ForceTopDown; |
2692 | 4 | if (RegionPolicy.OnlyTopDown) |
2693 | 4 | RegionPolicy.OnlyBottomUp = false; |
2694 | 4 | } |
2695 | 11.7M | } |
2696 | | |
2697 | 0 | void GenericScheduler::dumpPolicy() const { |
2698 | 0 | // Cannot completely remove virtual function even in release mode. |
2699 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2700 | | dbgs() << "GenericScheduler RegionPolicy: " |
2701 | | << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure |
2702 | | << " OnlyTopDown=" << RegionPolicy.OnlyTopDown |
2703 | | << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp |
2704 | | << "\n"; |
2705 | | #endif |
2706 | | } |
2707 | | |
2708 | | /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic |
2709 | | /// critical path by more cycles than it takes to drain the instruction buffer. |
2710 | | /// We estimate an upper bounds on in-flight instructions as: |
2711 | | /// |
2712 | | /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) |
2713 | | /// InFlightIterations = AcyclicPath / CyclesPerIteration |
2714 | | /// InFlightResources = InFlightIterations * LoopResources |
2715 | | /// |
2716 | | /// TODO: Check execution resources in addition to IssueCount. |
2717 | 4.05M | void GenericScheduler::checkAcyclicLatency() { |
2718 | 4.05M | if (Rem.CyclicCritPath == 0 || 4.05M Rem.CyclicCritPath >= Rem.CriticalPath10.9k ) |
2719 | 4.04M | return; |
2720 | 9.35k | |
2721 | 9.35k | // Scaled number of cycles per loop iteration. |
2722 | 9.35k | unsigned IterCount = |
2723 | 9.35k | std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), |
2724 | 9.35k | Rem.RemIssueCount); |
2725 | 9.35k | // Scaled acyclic critical path. |
2726 | 9.35k | unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); |
2727 | 9.35k | // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop |
2728 | 9.35k | unsigned InFlightCount = |
2729 | 9.35k | (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; |
2730 | 9.35k | unsigned BufferLimit = |
2731 | 9.35k | SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); |
2732 | 9.35k | |
2733 | 9.35k | Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; |
2734 | 9.35k | |
2735 | 9.35k | DEBUG(dbgs() << "IssueCycles=" |
2736 | 4.05M | << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " |
2737 | 4.05M | << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() |
2738 | 4.05M | << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount |
2739 | 4.05M | << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() |
2740 | 4.05M | << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; |
2741 | 4.05M | if (Rem.IsAcyclicLatencyLimited) |
2742 | 4.05M | dbgs() << " ACYCLIC LATENCY LIMIT\n"); |
2743 | 4.05M | } |
2744 | | |
2745 | 4.07M | void GenericScheduler::registerRoots() { |
2746 | 4.07M | Rem.CriticalPath = DAG->ExitSU.getDepth(); |
2747 | 4.07M | |
2748 | 4.07M | // Some roots may not feed into ExitSU. Check all of them in case. |
2749 | 9.65M | for (const SUnit *SU : Bot.Available) { |
2750 | 9.65M | if (SU->getDepth() > Rem.CriticalPath) |
2751 | 582k | Rem.CriticalPath = SU->getDepth(); |
2752 | 9.65M | } |
2753 | 4.07M | DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); |
2754 | 4.07M | if (DumpCriticalPathLength4.07M ) { |
2755 | 0 | errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; |
2756 | 0 | } |
2757 | 4.07M | |
2758 | 4.07M | if (EnableCyclicPath && 4.07M SchedModel->getMicroOpBufferSize() > 04.07M ) { |
2759 | 4.05M | Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); |
2760 | 4.05M | checkAcyclicLatency(); |
2761 | 4.05M | } |
2762 | 4.07M | } |
2763 | | |
2764 | | static bool tryPressure(const PressureChange &TryP, |
2765 | | const PressureChange &CandP, |
2766 | | GenericSchedulerBase::SchedCandidate &TryCand, |
2767 | | GenericSchedulerBase::SchedCandidate &Cand, |
2768 | | GenericSchedulerBase::CandReason Reason, |
2769 | | const TargetRegisterInfo *TRI, |
2770 | 146M | const MachineFunction &MF) { |
2771 | 146M | // If one candidate decreases and the other increases, go with it. |
2772 | 146M | // Invalid candidates have UnitInc==0. |
2773 | 146M | if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, |
2774 | 146M | Reason)) { |
2775 | 499k | return true; |
2776 | 499k | } |
2777 | 146M | // Do not compare the magnitude of pressure changes between top and bottom |
2778 | 146M | // boundary. |
2779 | 146M | if (146M Cand.AtTop != TryCand.AtTop146M ) |
2780 | 4.69M | return false; |
2781 | 141M | |
2782 | 141M | // If both candidates affect the same set in the same boundary, go with the |
2783 | 141M | // smallest increase. |
2784 | 141M | unsigned TryPSet = TryP.getPSetOrMax(); |
2785 | 141M | unsigned CandPSet = CandP.getPSetOrMax(); |
2786 | 141M | if (TryPSet == CandPSet141M ) { |
2787 | 137M | return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, |
2788 | 137M | Reason); |
2789 | 137M | } |
2790 | 4.32M | |
2791 | 4.32M | int TryRank = TryP.isValid() ? 4.32M TRI->getRegPressureSetScore(MF, TryPSet)4.20M : |
2792 | 123k | std::numeric_limits<int>::max(); |
2793 | 4.32M | |
2794 | 1.09M | int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : |
2795 | 3.22M | std::numeric_limits<int>::max(); |
2796 | 4.32M | |
2797 | 4.32M | // If the candidates are decreasing pressure, reverse priority. |
2798 | 4.32M | if (TryP.getUnitInc() < 0) |
2799 | 370 | std::swap(TryRank, CandRank); |
2800 | 146M | return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); |
2801 | 146M | } |
2802 | | |
2803 | 123M | static unsigned getWeakLeft(const SUnit *SU, bool isTop) { |
2804 | 123M | return (isTop) ? SU->WeakPredsLeft50.7M : SU->WeakSuccsLeft72.5M ; |
2805 | 123M | } |
2806 | | |
2807 | | /// Minimize physical register live ranges. Regalloc wants them adjacent to |
2808 | | /// their physreg def/use. |
2809 | | /// |
2810 | | /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf |
2811 | | /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled |
2812 | | /// with the operation that produces or consumes the physreg. We'll do this when |
2813 | | /// regalloc has support for parallel copies. |
2814 | 162M | static int biasPhysRegCopy(const SUnit *SU, bool isTop) { |
2815 | 162M | const MachineInstr *MI = SU->getInstr(); |
2816 | 162M | if (!MI->isCopy()) |
2817 | 132M | return 0; |
2818 | 30.5M | |
2819 | 30.5M | unsigned ScheduledOper = isTop ? 30.5M 112.2M : 018.3M ; |
2820 | 30.5M | unsigned UnscheduledOper = isTop ? 012.2M : 118.3M ; |
2821 | 30.5M | // If we have already scheduled the physreg produce/consumer, immediately |
2822 | 30.5M | // schedule the copy. |
2823 | 30.5M | if (TargetRegisterInfo::isPhysicalRegister( |
2824 | 30.5M | MI->getOperand(ScheduledOper).getReg())) |
2825 | 20.3M | return 1; |
2826 | 10.2M | // If the physreg is at the boundary, defer it. Otherwise schedule it |
2827 | 10.2M | // immediately to free the dependent. We can hoist the copy later. |
2828 | 10.2M | bool AtBoundary = isTop ? 10.2M !SU->NumSuccsLeft5.66M : !SU->NumPredsLeft4.57M ; |
2829 | 10.2M | if (TargetRegisterInfo::isPhysicalRegister( |
2830 | 10.2M | MI->getOperand(UnscheduledOper).getReg())) |
2831 | 8.34M | return AtBoundary ? 8.34M -18.30M : 137.6k ; |
2832 | 1.89M | return 0; |
2833 | 1.89M | } |
2834 | | |
2835 | | void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, |
2836 | | bool AtTop, |
2837 | | const RegPressureTracker &RPTracker, |
2838 | 83.2M | RegPressureTracker &TempTracker) { |
2839 | 83.2M | Cand.SU = SU; |
2840 | 83.2M | Cand.AtTop = AtTop; |
2841 | 83.2M | if (DAG->isTrackingPressure()83.2M ) { |
2842 | 52.0M | if (AtTop52.0M ) { |
2843 | 21.6M | TempTracker.getMaxDownwardPressureDelta( |
2844 | 21.6M | Cand.SU->getInstr(), |
2845 | 21.6M | Cand.RPDelta, |
2846 | 21.6M | DAG->getRegionCriticalPSets(), |
2847 | 21.6M | DAG->getRegPressure().MaxSetPressure); |
2848 | 52.0M | } else { |
2849 | 30.3M | if (VerifyScheduling30.3M ) { |
2850 | 19 | TempTracker.getMaxUpwardPressureDelta( |
2851 | 19 | Cand.SU->getInstr(), |
2852 | 19 | &DAG->getPressureDiff(Cand.SU), |
2853 | 19 | Cand.RPDelta, |
2854 | 19 | DAG->getRegionCriticalPSets(), |
2855 | 19 | DAG->getRegPressure().MaxSetPressure); |
2856 | 30.3M | } else { |
2857 | 30.3M | RPTracker.getUpwardPressureDelta( |
2858 | 30.3M | Cand.SU->getInstr(), |
2859 | 30.3M | DAG->getPressureDiff(Cand.SU), |
2860 | 30.3M | Cand.RPDelta, |
2861 | 30.3M | DAG->getRegionCriticalPSets(), |
2862 | 30.3M | DAG->getRegPressure().MaxSetPressure); |
2863 | 30.3M | } |
2864 | 30.3M | } |
2865 | 52.0M | } |
2866 | 83.2M | DEBUG(if (Cand.RPDelta.Excess.isValid()) |
2867 | 83.2M | dbgs() << " Try SU(" << Cand.SU->NodeNum << ") " |
2868 | 83.2M | << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) |
2869 | 83.2M | << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n"); |
2870 | 83.2M | } |
2871 | | |
2872 | | /// Apply a set of heursitics to a new candidate. Heuristics are currently |
2873 | | /// hierarchical. This may be more efficient than a graduated cost model because |
2874 | | /// we don't need to evaluate all aspects of the model for each node in the |
2875 | | /// queue. But it's really done to make the heuristics easier to debug and |
2876 | | /// statistically analyze. |
2877 | | /// |
2878 | | /// \param Cand provides the policy and current best candidate. |
2879 | | /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. |
2880 | | /// \param Zone describes the scheduled zone that we are extending, or nullptr |
2881 | | // if Cand is from a different zone than TryCand. |
2882 | | void GenericScheduler::tryCandidate(SchedCandidate &Cand, |
2883 | | SchedCandidate &TryCand, |
2884 | 94.7M | SchedBoundary *Zone) { |
2885 | 94.7M | // Initialize the candidate if needed. |
2886 | 94.7M | if (!Cand.isValid()94.7M ) { |
2887 | 13.3M | TryCand.Reason = NodeOrder; |
2888 | 13.3M | return; |
2889 | 13.3M | } |
2890 | 81.3M | |
2891 | 81.3M | if (81.3M tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), |
2892 | 81.3M | biasPhysRegCopy(Cand.SU, Cand.AtTop), |
2893 | 81.3M | TryCand, Cand, PhysRegCopy)) |
2894 | 9.92M | return; |
2895 | 71.4M | |
2896 | 71.4M | // Avoid exceeding the target's limit. |
2897 | 71.4M | if (71.4M DAG->isTrackingPressure() && 71.4M tryPressure(TryCand.RPDelta.Excess, |
2898 | 53.1M | Cand.RPDelta.Excess, |
2899 | 53.1M | TryCand, Cand, RegExcess, TRI, |
2900 | 53.1M | DAG->MF)) |
2901 | 2.04M | return; |
2902 | 69.4M | |
2903 | 69.4M | // Avoid increasing the max critical pressure in the scheduled region. |
2904 | 69.4M | if (69.4M DAG->isTrackingPressure() && 69.4M tryPressure(TryCand.RPDelta.CriticalMax, |
2905 | 51.1M | Cand.RPDelta.CriticalMax, |
2906 | 51.1M | TryCand, Cand, RegCritical, TRI, |
2907 | 51.1M | DAG->MF)) |
2908 | 1.94M | return; |
2909 | 67.4M | |
2910 | 67.4M | // We only compare a subset of features when comparing nodes between |
2911 | 67.4M | // Top and Bottom boundary. Some properties are simply incomparable, in many |
2912 | 67.4M | // other instances we should only override the other boundary if something |
2913 | 67.4M | // is a clear good pick on one boundary. Skip heuristics that are more |
2914 | 67.4M | // "tie-breaking" in nature. |
2915 | 67.4M | bool SameBoundary = Zone != nullptr; |
2916 | 67.4M | if (SameBoundary67.4M ) { |
2917 | 62.7M | // For loops that are acyclic path limited, aggressively schedule for |
2918 | 62.7M | // latency. Within an single cycle, whenever CurrMOps > 0, allow normal |
2919 | 62.7M | // heuristics to take precedence. |
2920 | 62.7M | if (Rem.IsAcyclicLatencyLimited && 62.7M !Zone->getCurrMOps()16.1M && |
2921 | 8.99M | tryLatency(TryCand, Cand, *Zone)) |
2922 | 150k | return; |
2923 | 62.5M | |
2924 | 62.5M | // Prioritize instructions that read unbuffered resources by stall cycles. |
2925 | 62.5M | if (62.5M tryLess(Zone->getLatencyStallCycles(TryCand.SU), |
2926 | 62.5M | Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) |
2927 | 23.1k | return; |
2928 | 67.3M | } |
2929 | 67.3M | |
2930 | 67.3M | // Keep clustered nodes together to encourage downstream peephole |
2931 | 67.3M | // optimizations which may reduce resource requirements. |
2932 | 67.3M | // |
2933 | 67.3M | // This is a best effort to set things up for a post-RA pass. Optimizations |
2934 | 67.3M | // like generating loads of multiple registers should ideally be done within |
2935 | 67.3M | // the scheduler pass by combining the loads during DAG postprocessing. |
2936 | 67.3M | const SUnit *CandNextClusterSU = |
2937 | 67.3M | Cand.AtTop ? DAG->getNextClusterSucc()25.3M : DAG->getNextClusterPred()41.9M ; |
2938 | 67.3M | const SUnit *TryCandNextClusterSU = |
2939 | 67.3M | TryCand.AtTop ? DAG->getNextClusterSucc()30.1M : DAG->getNextClusterPred()37.1M ; |
2940 | 67.3M | if (tryGreater(TryCand.SU == TryCandNextClusterSU, |
2941 | 67.3M | Cand.SU == CandNextClusterSU, |
2942 | 67.3M | TryCand, Cand, Cluster)) |
2943 | 1.30M | return; |
2944 | 66.0M | |
2945 | 66.0M | if (66.0M SameBoundary66.0M ) { |
2946 | 61.6M | // Weak edges are for clustering and other constraints. |
2947 | 61.6M | if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), |
2948 | 61.6M | getWeakLeft(Cand.SU, Cand.AtTop), |
2949 | 61.6M | TryCand, Cand, Weak)) |
2950 | 6.67M | return; |
2951 | 59.3M | } |
2952 | 59.3M | |
2953 | 59.3M | // Avoid increasing the max pressure of the entire region. |
2954 | 59.3M | if (59.3M DAG->isTrackingPressure() && 59.3M tryPressure(TryCand.RPDelta.CurrentMax, |
2955 | 42.5M | Cand.RPDelta.CurrentMax, |
2956 | 42.5M | TryCand, Cand, RegMax, TRI, |
2957 | 42.5M | DAG->MF)) |
2958 | 1.01M | return; |
2959 | 58.3M | |
2960 | 58.3M | if (58.3M SameBoundary58.3M ) { |
2961 | 53.9M | // Avoid critical resource consumption and balance the schedule. |
2962 | 53.9M | TryCand.initResourceDelta(DAG, SchedModel); |
2963 | 53.9M | if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, |
2964 | 53.9M | TryCand, Cand, ResourceReduce)) |
2965 | 195k | return; |
2966 | 53.7M | if (53.7M tryGreater(TryCand.ResDelta.DemandedResources, |
2967 | 53.7M | Cand.ResDelta.DemandedResources, |
2968 | 53.7M | TryCand, Cand, ResourceDemand)) |
2969 | 797k | return; |
2970 | 52.9M | |
2971 | 52.9M | // Avoid serializing long latency dependence chains. |
2972 | 52.9M | // For acyclic path limited loops, latency was already checked above. |
2973 | 52.9M | if (52.9M !RegionPolicy.DisableLatencyHeuristic && 52.9M TryCand.Policy.ReduceLatency3.29M && |
2974 | 52.9M | !Rem.IsAcyclicLatencyLimited742k && tryLatency(TryCand, Cand, *Zone)742k ) |
2975 | 277k | return; |
2976 | 52.6M | |
2977 | 52.6M | // Fall through to original instruction order. |
2978 | 52.6M | if (52.6M (Zone->isTop() && 52.6M TryCand.SU->NodeNum < Cand.SU->NodeNum21.6M ) |
2979 | 52.6M | || (!Zone->isTop() && 51.5M TryCand.SU->NodeNum > Cand.SU->NodeNum31.0M )) { |
2980 | 12.4M | TryCand.Reason = NodeOrder; |
2981 | 12.4M | } |
2982 | 53.9M | } |
2983 | 94.7M | } |
2984 | | |
2985 | | /// Pick the best candidate from the queue. |
2986 | | /// |
2987 | | /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during |
2988 | | /// DAG building. To adjust for the current scheduling location we need to |
2989 | | /// maintain the number of vreg uses remaining to be top-scheduled. |
2990 | | void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, |
2991 | | const CandPolicy &ZonePolicy, |
2992 | | const RegPressureTracker &RPTracker, |
2993 | 13.1M | SchedCandidate &Cand) { |
2994 | 13.1M | // getMaxPressureDelta temporarily modifies the tracker. |
2995 | 13.1M | RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); |
2996 | 13.1M | |
2997 | 13.1M | ReadyQueue &Q = Zone.Available; |
2998 | 83.2M | for (SUnit *SU : Q) { |
2999 | 83.2M | |
3000 | 83.2M | SchedCandidate TryCand(ZonePolicy); |
3001 | 83.2M | initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); |
3002 | 83.2M | // Pass SchedBoundary only when comparing nodes from the same boundary. |
3003 | 83.2M | SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone78.6M : nullptr4.55M ; |
3004 | 83.2M | tryCandidate(Cand, TryCand, ZoneArg); |
3005 | 83.2M | if (TryCand.Reason != NoCand83.2M ) { |
3006 | 28.5M | // Initialize resource delta if needed in case future heuristics query it. |
3007 | 28.5M | if (TryCand.ResDelta == SchedResourceDelta()) |
3008 | 27.0M | TryCand.initResourceDelta(DAG, SchedModel); |
3009 | 28.5M | Cand.setBest(TryCand); |
3010 | 28.5M | DEBUG(traceCandidate(Cand)); |
3011 | 28.5M | } |
3012 | 83.2M | } |
3013 | 13.1M | } |
3014 | | |
3015 | | /// Pick the best candidate node from either the top or bottom queue. |
3016 | 15.5M | SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { |
3017 | 15.5M | // Schedule as far as possible in the direction of no choice. This is most |
3018 | 15.5M | // efficient, but also provides the best heuristics for CriticalPSets. |
3019 | 15.5M | if (SUnit *SU15.5M = Bot.pickOnlyChoice()) { |
3020 | 6.72M | IsTopNode = false; |
3021 | 6.72M | tracePick(Only1, false); |
3022 | 6.72M | return SU; |
3023 | 6.72M | } |
3024 | 8.78M | if (SUnit *8.78M SU8.78M = Top.pickOnlyChoice()) { |
3025 | 198k | IsTopNode = true; |
3026 | 198k | tracePick(Only1, true); |
3027 | 198k | return SU; |
3028 | 198k | } |
3029 | 8.58M | // Set the bottom-up policy based on the state of the current bottom zone and |
3030 | 8.58M | // the instructions outside the zone, including the top zone. |
3031 | 8.58M | CandPolicy BotPolicy; |
3032 | 8.58M | setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); |
3033 | 8.58M | // Set the top-down policy based on the state of the current top zone and |
3034 | 8.58M | // the instructions outside the zone, including the bottom zone. |
3035 | 8.58M | CandPolicy TopPolicy; |
3036 | 8.58M | setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); |
3037 | 8.58M | |
3038 | 8.58M | // See if BotCand is still valid (because we previously scheduled from Top). |
3039 | 8.58M | DEBUG(dbgs() << "Picking from Bot:\n"); |
3040 | 8.58M | if (!BotCand.isValid() || 8.58M BotCand.SU->isScheduled3.48M || |
3041 | 8.58M | BotCand.Policy != BotPolicy666k ) { |
3042 | 8.17M | BotCand.reset(CandPolicy()); |
3043 | 8.17M | pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); |
3044 | 8.17M | assert(BotCand.Reason != NoCand && "failed to find the first candidate"); |
3045 | 8.58M | } else { |
3046 | 409k | DEBUG(traceCandidate(BotCand)); |
3047 | | #ifndef NDEBUG |
3048 | | if (VerifyScheduling) { |
3049 | | SchedCandidate TCand; |
3050 | | TCand.reset(CandPolicy()); |
3051 | | pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); |
3052 | | assert(TCand.SU == BotCand.SU && |
3053 | | "Last pick result should correspond to re-picking right now"); |
3054 | | } |
3055 | | #endif |
3056 | | } |
3057 | 8.58M | |
3058 | 8.58M | // Check if the top Q has a better candidate. |
3059 | 8.58M | DEBUG(dbgs() << "Picking from Top:\n"); |
3060 | 8.58M | if (!TopCand.isValid() || 8.58M TopCand.SU->isScheduled5.47M || |
3061 | 8.58M | TopCand.Policy != TopPolicy4.78M ) { |
3062 | 4.55M | TopCand.reset(CandPolicy()); |
3063 | 4.55M | pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); |
3064 | 4.55M | assert(TopCand.Reason != NoCand && "failed to find the first candidate"); |
3065 | 8.58M | } else { |
3066 | 4.02M | DEBUG(traceCandidate(TopCand)); |
3067 | | #ifndef NDEBUG |
3068 | | if (VerifyScheduling) { |
3069 | | SchedCandidate TCand; |
3070 | | TCand.reset(CandPolicy()); |
3071 | | pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); |
3072 | | assert(TCand.SU == TopCand.SU && |
3073 | | "Last pick result should correspond to re-picking right now"); |
3074 | | } |
3075 | | #endif |
3076 | | } |
3077 | 8.58M | |
3078 | 8.58M | // Pick best from BotCand and TopCand. |
3079 | 8.58M | assert(BotCand.isValid()); |
3080 | 8.58M | assert(TopCand.isValid()); |
3081 | 8.58M | SchedCandidate Cand = BotCand; |
3082 | 8.58M | TopCand.Reason = NoCand; |
3083 | 8.58M | tryCandidate(Cand, TopCand, nullptr); |
3084 | 8.58M | if (TopCand.Reason != NoCand8.58M ) { |
3085 | 1.09M | Cand.setBest(TopCand); |
3086 | 1.09M | DEBUG(traceCandidate(Cand)); |
3087 | 1.09M | } |
3088 | 15.5M | |
3089 | 15.5M | IsTopNode = Cand.AtTop; |
3090 | 15.5M | tracePick(Cand); |
3091 | 15.5M | return Cand.SU; |
3092 | 15.5M | } |
3093 | | |
3094 | | /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. |
3095 | 20.3M | SUnit *GenericScheduler::pickNode(bool &IsTopNode) { |
3096 | 20.3M | if (DAG->top() == DAG->bottom()20.3M ) { |
3097 | 4.05M | assert(Top.Available.empty() && Top.Pending.empty() && |
3098 | 4.05M | Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); |
3099 | 4.05M | return nullptr; |
3100 | 4.05M | } |
3101 | 16.2M | SUnit *SU; |
3102 | 16.2M | do { |
3103 | 16.2M | if (RegionPolicy.OnlyTopDown16.2M ) { |
3104 | 54 | SU = Top.pickOnlyChoice(); |
3105 | 54 | if (!SU54 ) { |
3106 | 46 | CandPolicy NoPolicy; |
3107 | 46 | TopCand.reset(NoPolicy); |
3108 | 46 | pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); |
3109 | 46 | assert(TopCand.Reason != NoCand && "failed to find a candidate"); |
3110 | 46 | tracePick(TopCand); |
3111 | 46 | SU = TopCand.SU; |
3112 | 46 | } |
3113 | 54 | IsTopNode = true; |
3114 | 16.2M | } else if (16.2M RegionPolicy.OnlyBottomUp16.2M ) { |
3115 | 756k | SU = Bot.pickOnlyChoice(); |
3116 | 756k | if (!SU756k ) { |
3117 | 380k | CandPolicy NoPolicy; |
3118 | 380k | BotCand.reset(NoPolicy); |
3119 | 380k | pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); |
3120 | 380k | assert(BotCand.Reason != NoCand && "failed to find a candidate"); |
3121 | 380k | tracePick(BotCand); |
3122 | 380k | SU = BotCand.SU; |
3123 | 380k | } |
3124 | 756k | IsTopNode = false; |
3125 | 16.2M | } else { |
3126 | 15.5M | SU = pickNodeBidirectional(IsTopNode); |
3127 | 15.5M | } |
3128 | 16.2M | } while (SU->isScheduled); |
3129 | 16.2M | |
3130 | 16.2M | if (SU->isTopReady()) |
3131 | 10.7M | Top.removeReady(SU); |
3132 | 16.2M | if (SU->isBottomReady()) |
3133 | 15.9M | Bot.removeReady(SU); |
3134 | 16.2M | |
3135 | 16.2M | DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); |
3136 | 20.3M | return SU; |
3137 | 20.3M | } |
3138 | | |
3139 | 1.34M | void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { |
3140 | 1.34M | MachineBasicBlock::iterator InsertPos = SU->getInstr(); |
3141 | 1.34M | if (!isTop) |
3142 | 279k | ++InsertPos; |
3143 | 1.34M | SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds1.06M : SU->Succs279k ; |
3144 | 1.34M | |
3145 | 1.34M | // Find already scheduled copies with a single physreg dependence and move |
3146 | 1.34M | // them just above the scheduled instruction. |
3147 | 557k | for (SDep &Dep : Deps) { |
3148 | 557k | if (Dep.getKind() != SDep::Data || 557k !TRI->isPhysicalRegister(Dep.getReg())326k ) |
3149 | 248k | continue; |
3150 | 309k | SUnit *DepSU = Dep.getSUnit(); |
3151 | 309k | if (isTop ? 309k DepSU->Succs.size() > 18.76k : DepSU->Preds.size() > 1300k ) |
3152 | 187k | continue; |
3153 | 122k | MachineInstr *Copy = DepSU->getInstr(); |
3154 | 122k | if (!Copy->isCopy()) |
3155 | 118k | continue; |
3156 | 3.97k | DEBUG3.97k (dbgs() << " Rescheduling physreg copy "; |
3157 | 3.97k | Dep.getSUnit()->dump(DAG)); |
3158 | 3.97k | DAG->moveInstruction(Copy, InsertPos); |
3159 | 3.97k | } |
3160 | 1.34M | } |
3161 | | |
3162 | | /// Update the scheduler's state after scheduling a node. This is the same node |
3163 | | /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to |
3164 | | /// update it's state based on the current cycle before MachineSchedStrategy |
3165 | | /// does. |
3166 | | /// |
3167 | | /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling |
3168 | | /// them here. See comments in biasPhysRegCopy. |
3169 | 16.5M | void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { |
3170 | 16.5M | if (IsTopNode16.5M ) { |
3171 | 1.34M | SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); |
3172 | 1.34M | Top.bumpNode(SU); |
3173 | 1.34M | if (SU->hasPhysRegUses) |
3174 | 1.06M | reschedulePhysRegCopies(SU, true); |
3175 | 16.5M | } else { |
3176 | 15.1M | SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); |
3177 | 15.1M | Bot.bumpNode(SU); |
3178 | 15.1M | if (SU->hasPhysRegDefs) |
3179 | 279k | reschedulePhysRegCopies(SU, false); |
3180 | 15.1M | } |
3181 | 16.5M | } |
3182 | | |
3183 | | /// Create the standard converging machine scheduler. This will be used as the |
3184 | | /// default scheduler if the target does not set a default. |
3185 | 534k | ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { |
3186 | 534k | ScheduleDAGMILive *DAG = |
3187 | 534k | new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)); |
3188 | 534k | // Register DAG post-processors. |
3189 | 534k | // |
3190 | 534k | // FIXME: extend the mutation API to allow earlier mutations to instantiate |
3191 | 534k | // data and pass it to later mutations. Have a single mutation that gathers |
3192 | 534k | // the interesting nodes in one pass. |
3193 | 534k | DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); |
3194 | 534k | return DAG; |
3195 | 534k | } |
3196 | | |
3197 | 0 | static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { |
3198 | 0 | return createGenericSchedLive(C); |
3199 | 0 | } |
3200 | | |
3201 | | static MachineSchedRegistry |
3202 | | GenericSchedRegistry("converge", "Standard converging scheduler.", |
3203 | | createConveringSched); |
3204 | | |
3205 | | //===----------------------------------------------------------------------===// |
3206 | | // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. |
3207 | | //===----------------------------------------------------------------------===// |
3208 | | |
3209 | 6.12k | void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { |
3210 | 6.12k | DAG = Dag; |
3211 | 6.12k | SchedModel = DAG->getSchedModel(); |
3212 | 6.12k | TRI = DAG->TRI; |
3213 | 6.12k | |
3214 | 6.12k | Rem.init(DAG, SchedModel); |
3215 | 6.12k | Top.init(DAG, SchedModel, &Rem); |
3216 | 6.12k | BotRoots.clear(); |
3217 | 6.12k | |
3218 | 6.12k | // Initialize the HazardRecognizers. If itineraries don't exist, are empty, |
3219 | 6.12k | // or are disabled, then these HazardRecs will be disabled. |
3220 | 6.12k | const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); |
3221 | 6.12k | if (!Top.HazardRec6.12k ) { |
3222 | 5.18k | Top.HazardRec = |
3223 | 5.18k | DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( |
3224 | 5.18k | Itin, DAG); |
3225 | 5.18k | } |
3226 | 6.12k | } |
3227 | | |
3228 | 6.12k | void PostGenericScheduler::registerRoots() { |
3229 | 6.12k | Rem.CriticalPath = DAG->ExitSU.getDepth(); |
3230 | 6.12k | |
3231 | 6.12k | // Some roots may not feed into ExitSU. Check all of them in case. |
3232 | 8.07k | for (const SUnit *SU : BotRoots) { |
3233 | 8.07k | if (SU->getDepth() > Rem.CriticalPath) |
3234 | 986 | Rem.CriticalPath = SU->getDepth(); |
3235 | 8.07k | } |
3236 | 6.12k | DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); |
3237 | 6.12k | if (DumpCriticalPathLength6.12k ) { |
3238 | 0 | errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; |
3239 | 0 | } |
3240 | 6.12k | } |
3241 | | |
3242 | | /// Apply a set of heursitics to a new candidate for PostRA scheduling. |
3243 | | /// |
3244 | | /// \param Cand provides the policy and current best candidate. |
3245 | | /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. |
3246 | | void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, |
3247 | 30.8k | SchedCandidate &TryCand) { |
3248 | 30.8k | // Initialize the candidate if needed. |
3249 | 30.8k | if (!Cand.isValid()30.8k ) { |
3250 | 8.89k | TryCand.Reason = NodeOrder; |
3251 | 8.89k | return; |
3252 | 8.89k | } |
3253 | 21.9k | |
3254 | 21.9k | // Prioritize instructions that read unbuffered resources by stall cycles. |
3255 | 21.9k | if (21.9k tryLess(Top.getLatencyStallCycles(TryCand.SU), |
3256 | 21.9k | Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) |
3257 | 25 | return; |
3258 | 21.9k | |
3259 | 21.9k | // Keep clustered nodes together. |
3260 | 21.9k | if (21.9k tryGreater(TryCand.SU == DAG->getNextClusterSucc(), |
3261 | 21.9k | Cand.SU == DAG->getNextClusterSucc(), |
3262 | 21.9k | TryCand, Cand, Cluster)) |
3263 | 117 | return; |
3264 | 21.7k | |
3265 | 21.7k | // Avoid critical resource consumption and balance the schedule. |
3266 | 21.7k | if (21.7k tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, |
3267 | 21.7k | TryCand, Cand, ResourceReduce)) |
3268 | 35 | return; |
3269 | 21.7k | if (21.7k tryGreater(TryCand.ResDelta.DemandedResources, |
3270 | 21.7k | Cand.ResDelta.DemandedResources, |
3271 | 21.7k | TryCand, Cand, ResourceDemand)) |
3272 | 0 | return; |
3273 | 21.7k | |
3274 | 21.7k | // Avoid serializing long latency dependence chains. |
3275 | 21.7k | if (21.7k Cand.Policy.ReduceLatency && 21.7k tryLatency(TryCand, Cand, Top)21.7k ) { |
3276 | 8.52k | return; |
3277 | 8.52k | } |
3278 | 13.2k | |
3279 | 13.2k | // Fall through to original instruction order. |
3280 | 13.2k | if (13.2k TryCand.SU->NodeNum < Cand.SU->NodeNum13.2k ) |
3281 | 5.04k | TryCand.Reason = NodeOrder; |
3282 | 30.8k | } |
3283 | | |
3284 | 8.89k | void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { |
3285 | 8.89k | ReadyQueue &Q = Top.Available; |
3286 | 30.8k | for (SUnit *SU : Q) { |
3287 | 30.8k | SchedCandidate TryCand(Cand.Policy); |
3288 | 30.8k | TryCand.SU = SU; |
3289 | 30.8k | TryCand.AtTop = true; |
3290 | 30.8k | TryCand.initResourceDelta(DAG, SchedModel); |
3291 | 30.8k | tryCandidate(Cand, TryCand); |
3292 | 30.8k | if (TryCand.Reason != NoCand30.8k ) { |
3293 | 16.9k | Cand.setBest(TryCand); |
3294 | 16.9k | DEBUG(traceCandidate(Cand)); |
3295 | 16.9k | } |
3296 | 30.8k | } |
3297 | 8.89k | } |
3298 | | |
3299 | | /// Pick the next node to schedule. |
3300 | 32.6k | SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { |
3301 | 32.6k | if (DAG->top() == DAG->bottom()32.6k ) { |
3302 | 6.12k | assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); |
3303 | 6.12k | return nullptr; |
3304 | 6.12k | } |
3305 | 26.5k | SUnit *SU; |
3306 | 26.5k | do { |
3307 | 26.5k | SU = Top.pickOnlyChoice(); |
3308 | 26.5k | if (SU26.5k ) { |
3309 | 17.6k | tracePick(Only1, true); |
3310 | 26.5k | } else { |
3311 | 8.89k | CandPolicy NoPolicy; |
3312 | 8.89k | SchedCandidate TopCand(NoPolicy); |
3313 | 8.89k | // Set the top-down policy based on the state of the current top zone and |
3314 | 8.89k | // the instructions outside the zone, including the bottom zone. |
3315 | 8.89k | setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); |
3316 | 8.89k | pickNodeFromQueue(TopCand); |
3317 | 8.89k | assert(TopCand.Reason != NoCand && "failed to find a candidate"); |
3318 | 8.89k | tracePick(TopCand); |
3319 | 8.89k | SU = TopCand.SU; |
3320 | 8.89k | } |
3321 | 26.5k | } while (SU->isScheduled); |
3322 | 26.5k | |
3323 | 26.5k | IsTopNode = true; |
3324 | 26.5k | Top.removeReady(SU); |
3325 | 26.5k | |
3326 | 26.5k | DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); |
3327 | 32.6k | return SU; |
3328 | 32.6k | } |
3329 | | |
3330 | | /// Called after ScheduleDAGMI has scheduled an instruction and updated |
3331 | | /// scheduled/remaining flags in the DAG nodes. |
3332 | 26.5k | void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { |
3333 | 26.5k | SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); |
3334 | 26.5k | Top.bumpNode(SU); |
3335 | 26.5k | } |
3336 | | |
3337 | 8.60k | ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { |
3338 | 8.60k | return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C), |
3339 | 8.60k | /*RemoveKillFlags=*/true); |
3340 | 8.60k | } |
3341 | | |
3342 | | //===----------------------------------------------------------------------===// |
3343 | | // ILP Scheduler. Currently for experimental analysis of heuristics. |
3344 | | //===----------------------------------------------------------------------===// |
3345 | | |
3346 | | namespace { |
3347 | | |
3348 | | /// \brief Order nodes by the ILP metric. |
3349 | | struct ILPOrder { |
3350 | | const SchedDFSResult *DFSResult = nullptr; |
3351 | | const BitVector *ScheduledTrees = nullptr; |
3352 | | bool MaximizeILP; |
3353 | | |
3354 | 8 | ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} |
3355 | | |
3356 | | /// \brief Apply a less-than relation on node priority. |
3357 | | /// |
3358 | | /// (Return true if A comes after B in the Q.) |
3359 | 326 | bool operator()(const SUnit *A, const SUnit *B) const { |
3360 | 326 | unsigned SchedTreeA = DFSResult->getSubtreeID(A); |
3361 | 326 | unsigned SchedTreeB = DFSResult->getSubtreeID(B); |
3362 | 326 | if (SchedTreeA != SchedTreeB326 ) { |
3363 | 174 | // Unscheduled trees have lower priority. |
3364 | 174 | if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) |
3365 | 113 | return ScheduledTrees->test(SchedTreeB); |
3366 | 61 | |
3367 | 61 | // Trees with shallower connections have have lower priority. |
3368 | 61 | if (61 DFSResult->getSubtreeLevel(SchedTreeA) |
3369 | 61 | != DFSResult->getSubtreeLevel(SchedTreeB)) { |
3370 | 0 | return DFSResult->getSubtreeLevel(SchedTreeA) |
3371 | 0 | < DFSResult->getSubtreeLevel(SchedTreeB); |
3372 | 0 | } |
3373 | 213 | } |
3374 | 213 | if (213 MaximizeILP213 ) |
3375 | 125 | return DFSResult->getILP(A) < DFSResult->getILP(B); |
3376 | 213 | else |
3377 | 88 | return DFSResult->getILP(A) > DFSResult->getILP(B); |
3378 | 0 | } |
3379 | | }; |
3380 | | |
3381 | | /// \brief Schedule based on the ILP metric. |
3382 | | class ILPScheduler : public MachineSchedStrategy { |
3383 | | ScheduleDAGMILive *DAG = nullptr; |
3384 | | ILPOrder Cmp; |
3385 | | |
3386 | | std::vector<SUnit*> ReadyQ; |
3387 | | |
3388 | | public: |
3389 | 8 | ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {} |
3390 | | |
3391 | 10 | void initialize(ScheduleDAGMI *dag) override { |
3392 | 10 | assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); |
3393 | 10 | DAG = static_cast<ScheduleDAGMILive*>(dag); |
3394 | 10 | DAG->computeDFSResult(); |
3395 | 10 | Cmp.DFSResult = DAG->getDFSResult(); |
3396 | 10 | Cmp.ScheduledTrees = &DAG->getScheduledTrees(); |
3397 | 10 | ReadyQ.clear(); |
3398 | 10 | } |
3399 | | |
3400 | 10 | void registerRoots() override { |
3401 | 10 | // Restore the heap in ReadyQ with the updated DFS results. |
3402 | 10 | std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); |
3403 | 10 | } |
3404 | | |
3405 | | /// Implement MachineSchedStrategy interface. |
3406 | | /// ----------------------------------------- |
3407 | | |
3408 | | /// Callback to select the highest priority node from the ready Q. |
3409 | 186 | SUnit *pickNode(bool &IsTopNode) override { |
3410 | 186 | if (ReadyQ.empty()186 ) return nullptr10 ; |
3411 | 176 | std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); |
3412 | 176 | SUnit *SU = ReadyQ.back(); |
3413 | 176 | ReadyQ.pop_back(); |
3414 | 176 | IsTopNode = false; |
3415 | 176 | DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " |
3416 | 186 | << " ILP: " << DAG->getDFSResult()->getILP(SU) |
3417 | 186 | << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" |
3418 | 186 | << DAG->getDFSResult()->getSubtreeLevel( |
3419 | 186 | DAG->getDFSResult()->getSubtreeID(SU)) << '\n' |
3420 | 186 | << "Scheduling " << *SU->getInstr()); |
3421 | 186 | return SU; |
3422 | 186 | } |
3423 | | |
3424 | | /// \brief Scheduler callback to notify that a new subtree is scheduled. |
3425 | 38 | void scheduleTree(unsigned SubtreeID) override { |
3426 | 38 | std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); |
3427 | 38 | } |
3428 | | |
3429 | | /// Callback after a node is scheduled. Mark a newly scheduled tree, notify |
3430 | | /// DFSResults, and resort the priority Q. |
3431 | 176 | void schedNode(SUnit *SU, bool IsTopNode) override { |
3432 | 176 | assert(!IsTopNode && "SchedDFSResult needs bottom-up"); |
3433 | 176 | } |
3434 | | |
3435 | 64 | void releaseTopNode(SUnit *) override { /*only called for top roots*/ } |
3436 | | |
3437 | 176 | void releaseBottomNode(SUnit *SU) override { |
3438 | 176 | ReadyQ.push_back(SU); |
3439 | 176 | std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); |
3440 | 176 | } |
3441 | | }; |
3442 | | |
3443 | | } // end anonymous namespace |
3444 | | |
3445 | 6 | static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { |
3446 | 6 | return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true)); |
3447 | 6 | } |
3448 | 2 | static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { |
3449 | 2 | return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false)); |
3450 | 2 | } |
3451 | | |
3452 | | static MachineSchedRegistry ILPMaxRegistry( |
3453 | | "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); |
3454 | | static MachineSchedRegistry ILPMinRegistry( |
3455 | | "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); |
3456 | | |
3457 | | //===----------------------------------------------------------------------===// |
3458 | | // Machine Instruction Shuffler for Correctness Testing |
3459 | | //===----------------------------------------------------------------------===// |
3460 | | |
3461 | | #ifndef NDEBUG |
3462 | | namespace { |
3463 | | |
3464 | | /// Apply a less-than relation on the node order, which corresponds to the |
3465 | | /// instruction order prior to scheduling. IsReverse implements greater-than. |
3466 | | template<bool IsReverse> |
3467 | | struct SUnitOrder { |
3468 | | bool operator()(SUnit *A, SUnit *B) const { |
3469 | | if (IsReverse) |
3470 | | return A->NodeNum > B->NodeNum; |
3471 | | else |
3472 | | return A->NodeNum < B->NodeNum; |
3473 | | } |
3474 | | }; |
3475 | | |
3476 | | /// Reorder instructions as much as possible. |
3477 | | class InstructionShuffler : public MachineSchedStrategy { |
3478 | | bool IsAlternating; |
3479 | | bool IsTopDown; |
3480 | | |
3481 | | // Using a less-than relation (SUnitOrder<false>) for the TopQ priority |
3482 | | // gives nodes with a higher number higher priority causing the latest |
3483 | | // instructions to be scheduled first. |
3484 | | PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>> |
3485 | | TopQ; |
3486 | | |
3487 | | // When scheduling bottom-up, use greater-than as the queue priority. |
3488 | | PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>> |
3489 | | BottomQ; |
3490 | | |
3491 | | public: |
3492 | | InstructionShuffler(bool alternate, bool topdown) |
3493 | | : IsAlternating(alternate), IsTopDown(topdown) {} |
3494 | | |
3495 | | void initialize(ScheduleDAGMI*) override { |
3496 | | TopQ.clear(); |
3497 | | BottomQ.clear(); |
3498 | | } |
3499 | | |
3500 | | /// Implement MachineSchedStrategy interface. |
3501 | | /// ----------------------------------------- |
3502 | | |
3503 | | SUnit *pickNode(bool &IsTopNode) override { |
3504 | | SUnit *SU; |
3505 | | if (IsTopDown) { |
3506 | | do { |
3507 | | if (TopQ.empty()) return nullptr; |
3508 | | SU = TopQ.top(); |
3509 | | TopQ.pop(); |
3510 | | } while (SU->isScheduled); |
3511 | | IsTopNode = true; |
3512 | | } else { |
3513 | | do { |
3514 | | if (BottomQ.empty()) return nullptr; |
3515 | | SU = BottomQ.top(); |
3516 | | BottomQ.pop(); |
3517 | | } while (SU->isScheduled); |
3518 | | IsTopNode = false; |
3519 | | } |
3520 | | if (IsAlternating) |
3521 | | IsTopDown = !IsTopDown; |
3522 | | return SU; |
3523 | | } |
3524 | | |
3525 | | void schedNode(SUnit *SU, bool IsTopNode) override {} |
3526 | | |
3527 | | void releaseTopNode(SUnit *SU) override { |
3528 | | TopQ.push(SU); |
3529 | | } |
3530 | | void releaseBottomNode(SUnit *SU) override { |
3531 | | BottomQ.push(SU); |
3532 | | } |
3533 | | }; |
3534 | | |
3535 | | } // end anonymous namespace |
3536 | | |
3537 | | static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { |
3538 | | bool Alternate = !ForceTopDown && !ForceBottomUp; |
3539 | | bool TopDown = !ForceBottomUp; |
3540 | | assert((TopDown || !ForceTopDown) && |
3541 | | "-misched-topdown incompatible with -misched-bottomup"); |
3542 | | return new ScheduleDAGMILive( |
3543 | | C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown)); |
3544 | | } |
3545 | | |
3546 | | static MachineSchedRegistry ShufflerRegistry( |
3547 | | "shuffle", "Shuffle machine instructions alternating directions", |
3548 | | createInstructionShuffler); |
3549 | | #endif // !NDEBUG |
3550 | | |
3551 | | //===----------------------------------------------------------------------===// |
3552 | | // GraphWriter support for ScheduleDAGMILive. |
3553 | | //===----------------------------------------------------------------------===// |
3554 | | |
3555 | | #ifndef NDEBUG |
3556 | | namespace llvm { |
3557 | | |
3558 | | template<> struct GraphTraits< |
3559 | | ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; |
3560 | | |
3561 | | template<> |
3562 | | struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { |
3563 | | DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} |
3564 | | |
3565 | | static std::string getGraphName(const ScheduleDAG *G) { |
3566 | | return G->MF.getName(); |
3567 | | } |
3568 | | |
3569 | | static bool renderGraphFromBottomUp() { |
3570 | | return true; |
3571 | | } |
3572 | | |
3573 | | static bool isNodeHidden(const SUnit *Node) { |
3574 | | if (ViewMISchedCutoff == 0) |
3575 | | return false; |
3576 | | return (Node->Preds.size() > ViewMISchedCutoff |
3577 | | || Node->Succs.size() > ViewMISchedCutoff); |
3578 | | } |
3579 | | |
3580 | | /// If you want to override the dot attributes printed for a particular |
3581 | | /// edge, override this method. |
3582 | | static std::string getEdgeAttributes(const SUnit *Node, |
3583 | | SUnitIterator EI, |
3584 | | const ScheduleDAG *Graph) { |
3585 | | if (EI.isArtificialDep()) |
3586 | | return "color=cyan,style=dashed"; |
3587 | | if (EI.isCtrlDep()) |
3588 | | return "color=blue,style=dashed"; |
3589 | | return ""; |
3590 | | } |
3591 | | |
3592 | | static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { |
3593 | | std::string Str; |
3594 | | raw_string_ostream SS(Str); |
3595 | | const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); |
3596 | | const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? |
3597 | | static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; |
3598 | | SS << "SU:" << SU->NodeNum; |
3599 | | if (DFS) |
3600 | | SS << " I:" << DFS->getNumInstrs(SU); |
3601 | | return SS.str(); |
3602 | | } |
3603 | | |
3604 | | static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { |
3605 | | return G->getGraphNodeLabel(SU); |
3606 | | } |
3607 | | |
3608 | | static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { |
3609 | | std::string Str("shape=Mrecord"); |
3610 | | const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); |
3611 | | const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? |
3612 | | static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; |
3613 | | if (DFS) { |
3614 | | Str += ",style=filled,fillcolor=\"#"; |
3615 | | Str += DOT::getColorString(DFS->getSubtreeID(N)); |
3616 | | Str += '"'; |
3617 | | } |
3618 | | return Str; |
3619 | | } |
3620 | | }; |
3621 | | |
3622 | | } // end namespace llvm |
3623 | | #endif // NDEBUG |
3624 | | |
3625 | | /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG |
3626 | | /// rendered using 'dot'. |
3627 | 0 | void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { |
3628 | | #ifndef NDEBUG |
3629 | | ViewGraph(this, Name, false, Title); |
3630 | | #else |
3631 | | errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " |
3632 | 0 | << "systems with Graphviz or gv!\n"; |
3633 | 0 | #endif // NDEBUG |
3634 | 0 | } |
3635 | | |
3636 | | /// Out-of-line implementation with no arguments is handy for gdb. |
3637 | 0 | void ScheduleDAGMI::viewGraph() { |
3638 | 0 | viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); |
3639 | 0 | } |