/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/ScheduleDAG.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | /// \file Implements the ScheduleDAG class, which is a base class used by |
11 | | /// scheduling implementation classes. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "llvm/CodeGen/ScheduleDAG.h" |
16 | | #include "llvm/ADT/STLExtras.h" |
17 | | #include "llvm/ADT/SmallVector.h" |
18 | | #include "llvm/ADT/iterator_range.h" |
19 | | #include "llvm/CodeGen/MachineFunction.h" |
20 | | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" |
21 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
22 | | #include "llvm/Support/CommandLine.h" |
23 | | #include "llvm/Support/Compiler.h" |
24 | | #include "llvm/Support/Debug.h" |
25 | | #include "llvm/Support/raw_ostream.h" |
26 | | #include "llvm/Target/TargetInstrInfo.h" |
27 | | #include "llvm/Target/TargetRegisterInfo.h" |
28 | | #include "llvm/Target/TargetSubtargetInfo.h" |
29 | | #include <algorithm> |
30 | | #include <cassert> |
31 | | #include <iterator> |
32 | | #include <limits> |
33 | | #include <utility> |
34 | | #include <vector> |
35 | | |
36 | | using namespace llvm; |
37 | | |
38 | | #define DEBUG_TYPE "pre-RA-sched" |
39 | | |
40 | | #ifndef NDEBUG |
41 | | static cl::opt<bool> StressSchedOpt( |
42 | | "stress-sched", cl::Hidden, cl::init(false), |
43 | | cl::desc("Stress test instruction scheduling")); |
44 | | #endif |
45 | | |
46 | 0 | void SchedulingPriorityQueue::anchor() {} |
47 | | |
48 | | ScheduleDAG::ScheduleDAG(MachineFunction &mf) |
49 | | : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()), |
50 | | TRI(mf.getSubtarget().getRegisterInfo()), MF(mf), |
51 | 4.02M | MRI(mf.getRegInfo()) { |
52 | | #ifndef NDEBUG |
53 | | StressSched = StressSchedOpt; |
54 | | #endif |
55 | | } |
56 | | |
57 | 4.02M | ScheduleDAG::~ScheduleDAG() = default; |
58 | | |
59 | 7.70M | void ScheduleDAG::clearDAG() { |
60 | 7.70M | SUnits.clear(); |
61 | 7.70M | EntrySU = SUnit(); |
62 | 7.70M | ExitSU = SUnit(); |
63 | 7.70M | } |
64 | | |
65 | 730k | const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { |
66 | 730k | if (!Node || 730k !Node->isMachineOpcode()730k ) return nullptr213k ; |
67 | 517k | return &TII->get(Node->getMachineOpcode()); |
68 | 517k | } |
69 | | |
70 | | LLVM_DUMP_METHOD |
71 | 0 | raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { |
72 | 0 | switch (getKind()) { |
73 | 0 | case Data: OS << "Data"; break; |
74 | 0 | case Anti: OS << "Anti"; break; |
75 | 0 | case Output: OS << "Out "; break; |
76 | 0 | case Order: OS << "Ord "; break; |
77 | 0 | } |
78 | 0 |
|
79 | 0 | switch (getKind()) { |
80 | 0 | case Data: |
81 | 0 | OS << " Latency=" << getLatency(); |
82 | 0 | if (TRI && 0 isAssignedRegDep()0 ) |
83 | 0 | OS << " Reg=" << PrintReg(getReg(), TRI); |
84 | 0 | break; |
85 | 0 | case Anti: |
86 | 0 | case Output: |
87 | 0 | OS << " Latency=" << getLatency(); |
88 | 0 | break; |
89 | 0 | case Order: |
90 | 0 | OS << " Latency=" << getLatency(); |
91 | 0 | switch(Contents.OrdKind) { |
92 | 0 | case Barrier: OS << " Barrier"; break; |
93 | 0 | case MayAliasMem: |
94 | 0 | case MustAliasMem: OS << " Memory"; break; |
95 | 0 | case Artificial: OS << " Artificial"; break; |
96 | 0 | case Weak: OS << " Weak"; break; |
97 | 0 | case Cluster: OS << " Cluster"; break; |
98 | 0 | } |
99 | 0 | break; |
100 | 0 | } |
101 | 0 |
|
102 | 0 | return OS; |
103 | 0 | } |
104 | | |
105 | 61.6M | bool SUnit::addPred(const SDep &D, bool Required) { |
106 | 61.6M | // If this node already has this dependence, don't add a redundant one. |
107 | 318M | for (SDep &PredDep : Preds) { |
108 | 318M | // Zero-latency weak edges may be added purely for heuristic ordering. Don't |
109 | 318M | // add them if another kind of edge already exists. |
110 | 318M | if (!Required && 318M PredDep.getSUnit() == D.getSUnit()7.30M ) |
111 | 265k | return false; |
112 | 318M | if (318M PredDep.overlaps(D)318M ) { |
113 | 957k | // Extend the latency if needed. Equivalent to |
114 | 957k | // removePred(PredDep) + addPred(D). |
115 | 957k | if (PredDep.getLatency() < D.getLatency()957k ) { |
116 | 2.74k | SUnit *PredSU = PredDep.getSUnit(); |
117 | 2.74k | // Find the corresponding successor in N. |
118 | 2.74k | SDep ForwardD = PredDep; |
119 | 2.74k | ForwardD.setSUnit(this); |
120 | 5.82k | for (SDep &SuccDep : PredSU->Succs) { |
121 | 5.82k | if (SuccDep == ForwardD5.82k ) { |
122 | 2.74k | SuccDep.setLatency(D.getLatency()); |
123 | 2.74k | break; |
124 | 2.74k | } |
125 | 2.74k | } |
126 | 2.74k | PredDep.setLatency(D.getLatency()); |
127 | 2.74k | } |
128 | 957k | return false; |
129 | 957k | } |
130 | 60.3M | } |
131 | 60.3M | // Now add a corresponding succ to N. |
132 | 60.3M | SDep P = D; |
133 | 60.3M | P.setSUnit(this); |
134 | 60.3M | SUnit *N = D.getSUnit(); |
135 | 60.3M | // Update the bookkeeping. |
136 | 60.3M | if (D.getKind() == SDep::Data60.3M ) { |
137 | 29.8M | assert(NumPreds < std::numeric_limits<unsigned>::max() && |
138 | 29.8M | "NumPreds will overflow!"); |
139 | 29.8M | assert(N->NumSuccs < std::numeric_limits<unsigned>::max() && |
140 | 29.8M | "NumSuccs will overflow!"); |
141 | 29.8M | ++NumPreds; |
142 | 29.8M | ++N->NumSuccs; |
143 | 29.8M | } |
144 | 60.3M | if (!N->isScheduled60.3M ) { |
145 | 60.3M | if (D.isWeak()60.3M ) { |
146 | 989k | ++WeakPredsLeft; |
147 | 989k | } |
148 | 59.3M | else { |
149 | 59.3M | assert(NumPredsLeft < std::numeric_limits<unsigned>::max() && |
150 | 59.3M | "NumPredsLeft will overflow!"); |
151 | 59.3M | ++NumPredsLeft; |
152 | 59.3M | } |
153 | 60.3M | } |
154 | 60.3M | if (!isScheduled60.3M ) { |
155 | 60.3M | if (D.isWeak()60.3M ) { |
156 | 989k | ++N->WeakSuccsLeft; |
157 | 989k | } |
158 | 59.3M | else { |
159 | 59.3M | assert(N->NumSuccsLeft < std::numeric_limits<unsigned>::max() && |
160 | 59.3M | "NumSuccsLeft will overflow!"); |
161 | 59.3M | ++N->NumSuccsLeft; |
162 | 59.3M | } |
163 | 60.3M | } |
164 | 60.3M | Preds.push_back(D); |
165 | 60.3M | N->Succs.push_back(P); |
166 | 60.3M | if (P.getLatency() != 060.3M ) { |
167 | 44.5M | this->setDepthDirty(); |
168 | 44.5M | N->setHeightDirty(); |
169 | 44.5M | } |
170 | 61.6M | return true; |
171 | 61.6M | } |
172 | | |
173 | 2.12k | void SUnit::removePred(const SDep &D) { |
174 | 2.12k | // Find the matching predecessor. |
175 | 2.12k | SmallVectorImpl<SDep>::iterator I = llvm::find(Preds, D); |
176 | 2.12k | if (I == Preds.end()) |
177 | 105 | return; |
178 | 2.01k | // Find the corresponding successor in N. |
179 | 2.01k | SDep P = D; |
180 | 2.01k | P.setSUnit(this); |
181 | 2.01k | SUnit *N = D.getSUnit(); |
182 | 2.01k | SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P); |
183 | 2.01k | assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); |
184 | 2.01k | N->Succs.erase(Succ); |
185 | 2.01k | Preds.erase(I); |
186 | 2.01k | // Update the bookkeeping. |
187 | 2.01k | if (P.getKind() == SDep::Data2.01k ) { |
188 | 750 | assert(NumPreds > 0 && "NumPreds will underflow!"); |
189 | 750 | assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); |
190 | 750 | --NumPreds; |
191 | 750 | --N->NumSuccs; |
192 | 750 | } |
193 | 2.01k | if (!N->isScheduled2.01k ) { |
194 | 2.01k | if (D.isWeak()) |
195 | 0 | --WeakPredsLeft; |
196 | 2.01k | else { |
197 | 2.01k | assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); |
198 | 2.01k | --NumPredsLeft; |
199 | 2.01k | } |
200 | 2.01k | } |
201 | 2.01k | if (!isScheduled2.01k ) { |
202 | 1.43k | if (D.isWeak()) |
203 | 0 | --N->WeakSuccsLeft; |
204 | 1.43k | else { |
205 | 1.43k | assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); |
206 | 1.43k | --N->NumSuccsLeft; |
207 | 1.43k | } |
208 | 1.43k | } |
209 | 2.01k | if (P.getLatency() != 02.01k ) { |
210 | 775 | this->setDepthDirty(); |
211 | 775 | N->setHeightDirty(); |
212 | 775 | } |
213 | 2.12k | } |
214 | | |
215 | 55.8M | void SUnit::setDepthDirty() { |
216 | 55.8M | if (!isDepthCurrent55.8M ) return55.6M ; |
217 | 139k | SmallVector<SUnit*, 8> WorkList; |
218 | 139k | WorkList.push_back(this); |
219 | 146k | do { |
220 | 146k | SUnit *SU = WorkList.pop_back_val(); |
221 | 146k | SU->isDepthCurrent = false; |
222 | 669k | for (SDep &SuccDep : SU->Succs) { |
223 | 669k | SUnit *SuccSU = SuccDep.getSUnit(); |
224 | 669k | if (SuccSU->isDepthCurrent) |
225 | 7.39k | WorkList.push_back(SuccSU); |
226 | 669k | } |
227 | 146k | } while (!WorkList.empty()); |
228 | 55.8M | } |
229 | | |
230 | 89.5M | void SUnit::setHeightDirty() { |
231 | 89.5M | if (!isHeightCurrent89.5M ) return79.3M ; |
232 | 10.1M | SmallVector<SUnit*, 8> WorkList; |
233 | 10.1M | WorkList.push_back(this); |
234 | 10.5M | do { |
235 | 10.5M | SUnit *SU = WorkList.pop_back_val(); |
236 | 10.5M | SU->isHeightCurrent = false; |
237 | 13.9M | for (SDep &PredDep : SU->Preds) { |
238 | 13.9M | SUnit *PredSU = PredDep.getSUnit(); |
239 | 13.9M | if (PredSU->isHeightCurrent) |
240 | 335k | WorkList.push_back(PredSU); |
241 | 13.9M | } |
242 | 10.5M | } while (!WorkList.empty()); |
243 | 89.5M | } |
244 | | |
245 | 417k | void SUnit::setDepthToAtLeast(unsigned NewDepth) { |
246 | 417k | if (NewDepth <= getDepth()) |
247 | 278k | return; |
248 | 138k | setDepthDirty(); |
249 | 138k | Depth = NewDepth; |
250 | 138k | isDepthCurrent = true; |
251 | 138k | } |
252 | | |
253 | 30.1M | void SUnit::setHeightToAtLeast(unsigned NewHeight) { |
254 | 30.1M | if (NewHeight <= getHeight()) |
255 | 20.0M | return; |
256 | 10.1M | setHeightDirty(); |
257 | 10.1M | Height = NewHeight; |
258 | 10.1M | isHeightCurrent = true; |
259 | 10.1M | } |
260 | | |
261 | | /// Calculates the maximal path from the node to the exit. |
262 | 11.9M | void SUnit::ComputeDepth() { |
263 | 11.9M | SmallVector<SUnit*, 8> WorkList; |
264 | 11.9M | WorkList.push_back(this); |
265 | 35.9M | do { |
266 | 35.9M | SUnit *Cur = WorkList.back(); |
267 | 35.9M | |
268 | 35.9M | bool Done = true; |
269 | 35.9M | unsigned MaxPredDepth = 0; |
270 | 47.8M | for (const SDep &PredDep : Cur->Preds) { |
271 | 47.8M | SUnit *PredSU = PredDep.getSUnit(); |
272 | 47.8M | if (PredSU->isDepthCurrent) |
273 | 32.9M | MaxPredDepth = std::max(MaxPredDepth, |
274 | 32.9M | PredSU->Depth + PredDep.getLatency()); |
275 | 14.9M | else { |
276 | 14.9M | Done = false; |
277 | 14.9M | WorkList.push_back(PredSU); |
278 | 14.9M | } |
279 | 47.8M | } |
280 | 35.9M | |
281 | 35.9M | if (Done35.9M ) { |
282 | 26.9M | WorkList.pop_back(); |
283 | 26.9M | if (MaxPredDepth != Cur->Depth26.9M ) { |
284 | 11.0M | Cur->setDepthDirty(); |
285 | 11.0M | Cur->Depth = MaxPredDepth; |
286 | 11.0M | } |
287 | 26.9M | Cur->isDepthCurrent = true; |
288 | 26.9M | } |
289 | 35.9M | } while (!WorkList.empty()); |
290 | 11.9M | } |
291 | | |
292 | | /// Calculates the maximal path from the node to the entry. |
293 | 42.2M | void SUnit::ComputeHeight() { |
294 | 42.2M | SmallVector<SUnit*, 8> WorkList; |
295 | 42.2M | WorkList.push_back(this); |
296 | 62.0M | do { |
297 | 62.0M | SUnit *Cur = WorkList.back(); |
298 | 62.0M | |
299 | 62.0M | bool Done = true; |
300 | 62.0M | unsigned MaxSuccHeight = 0; |
301 | 204M | for (const SDep &SuccDep : Cur->Succs) { |
302 | 204M | SUnit *SuccSU = SuccDep.getSUnit(); |
303 | 204M | if (SuccSU->isHeightCurrent) |
304 | 191M | MaxSuccHeight = std::max(MaxSuccHeight, |
305 | 191M | SuccSU->Height + SuccDep.getLatency()); |
306 | 12.7M | else { |
307 | 12.7M | Done = false; |
308 | 12.7M | WorkList.push_back(SuccSU); |
309 | 12.7M | } |
310 | 204M | } |
311 | 62.0M | |
312 | 62.0M | if (Done62.0M ) { |
313 | 55.0M | WorkList.pop_back(); |
314 | 55.0M | if (MaxSuccHeight != Cur->Height55.0M ) { |
315 | 34.8M | Cur->setHeightDirty(); |
316 | 34.8M | Cur->Height = MaxSuccHeight; |
317 | 34.8M | } |
318 | 55.0M | Cur->isHeightCurrent = true; |
319 | 55.0M | } |
320 | 62.0M | } while (!WorkList.empty()); |
321 | 42.2M | } |
322 | | |
323 | 20.7M | void SUnit::biasCriticalPath() { |
324 | 20.7M | if (NumPreds < 2) |
325 | 19.4M | return; |
326 | 1.26M | |
327 | 1.26M | SUnit::pred_iterator BestI = Preds.begin(); |
328 | 1.26M | unsigned MaxDepth = BestI->getSUnit()->getDepth(); |
329 | 6.49M | for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E; |
330 | 5.22M | ++I5.22M ) { |
331 | 5.22M | if (I->getKind() == SDep::Data && 5.22M I->getSUnit()->getDepth() > MaxDepth1.55M ) |
332 | 280k | BestI = I; |
333 | 5.22M | } |
334 | 1.26M | if (BestI != Preds.begin()) |
335 | 265k | std::swap(*Preds.begin(), *BestI); |
336 | 20.7M | } |
337 | | |
338 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
339 | | LLVM_DUMP_METHOD |
340 | | raw_ostream &SUnit::print(raw_ostream &OS, |
341 | | const SUnit *Entry, const SUnit *Exit) const { |
342 | | if (this == Entry) |
343 | | OS << "EntrySU"; |
344 | | else if (this == Exit) |
345 | | OS << "ExitSU"; |
346 | | else |
347 | | OS << "SU(" << NodeNum << ")"; |
348 | | return OS; |
349 | | } |
350 | | |
351 | | LLVM_DUMP_METHOD |
352 | | raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const { |
353 | | return print(OS, &G->EntrySU, &G->ExitSU); |
354 | | } |
355 | | |
356 | | LLVM_DUMP_METHOD |
357 | | void SUnit::dump(const ScheduleDAG *G) const { |
358 | | print(dbgs(), G); |
359 | | dbgs() << ": "; |
360 | | G->dumpNode(this); |
361 | | } |
362 | | |
363 | | LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const { |
364 | | dump(G); |
365 | | |
366 | | dbgs() << " # preds left : " << NumPredsLeft << "\n"; |
367 | | dbgs() << " # succs left : " << NumSuccsLeft << "\n"; |
368 | | if (WeakPredsLeft) |
369 | | dbgs() << " # weak preds left : " << WeakPredsLeft << "\n"; |
370 | | if (WeakSuccsLeft) |
371 | | dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; |
372 | | dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; |
373 | | dbgs() << " Latency : " << Latency << "\n"; |
374 | | dbgs() << " Depth : " << getDepth() << "\n"; |
375 | | dbgs() << " Height : " << getHeight() << "\n"; |
376 | | |
377 | | if (Preds.size() != 0) { |
378 | | dbgs() << " Predecessors:\n"; |
379 | | for (const SDep &Dep : Preds) { |
380 | | dbgs() << " "; |
381 | | Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; |
382 | | Dep.print(dbgs(), G->TRI); dbgs() << '\n'; |
383 | | } |
384 | | } |
385 | | if (Succs.size() != 0) { |
386 | | dbgs() << " Successors:\n"; |
387 | | for (const SDep &Dep : Succs) { |
388 | | dbgs() << " "; |
389 | | Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; |
390 | | Dep.print(dbgs(), G->TRI); dbgs() << '\n'; |
391 | | } |
392 | | } |
393 | | } |
394 | | #endif |
395 | | |
396 | | #ifndef NDEBUG |
397 | | unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { |
398 | | bool AnyNotSched = false; |
399 | | unsigned DeadNodes = 0; |
400 | | for (const SUnit &SUnit : SUnits) { |
401 | | if (!SUnit.isScheduled) { |
402 | | if (SUnit.NumPreds == 0 && SUnit.NumSuccs == 0) { |
403 | | ++DeadNodes; |
404 | | continue; |
405 | | } |
406 | | if (!AnyNotSched) |
407 | | dbgs() << "*** Scheduling failed! ***\n"; |
408 | | SUnit.dump(this); |
409 | | dbgs() << "has not been scheduled!\n"; |
410 | | AnyNotSched = true; |
411 | | } |
412 | | if (SUnit.isScheduled && |
413 | | (isBottomUp ? SUnit.getHeight() : SUnit.getDepth()) > |
414 | | unsigned(std::numeric_limits<int>::max())) { |
415 | | if (!AnyNotSched) |
416 | | dbgs() << "*** Scheduling failed! ***\n"; |
417 | | SUnit.dump(this); |
418 | | dbgs() << "has an unexpected " |
419 | | << (isBottomUp ? "Height" : "Depth") << " value!\n"; |
420 | | AnyNotSched = true; |
421 | | } |
422 | | if (isBottomUp) { |
423 | | if (SUnit.NumSuccsLeft != 0) { |
424 | | if (!AnyNotSched) |
425 | | dbgs() << "*** Scheduling failed! ***\n"; |
426 | | SUnit.dump(this); |
427 | | dbgs() << "has successors left!\n"; |
428 | | AnyNotSched = true; |
429 | | } |
430 | | } else { |
431 | | if (SUnit.NumPredsLeft != 0) { |
432 | | if (!AnyNotSched) |
433 | | dbgs() << "*** Scheduling failed! ***\n"; |
434 | | SUnit.dump(this); |
435 | | dbgs() << "has predecessors left!\n"; |
436 | | AnyNotSched = true; |
437 | | } |
438 | | } |
439 | | } |
440 | | assert(!AnyNotSched); |
441 | | return SUnits.size() - DeadNodes; |
442 | | } |
443 | | #endif |
444 | | |
445 | 7.51M | void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { |
446 | 7.51M | // The idea of the algorithm is taken from |
447 | 7.51M | // "Online algorithms for managing the topological order of |
448 | 7.51M | // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly |
449 | 7.51M | // This is the MNR algorithm, which was first introduced by |
450 | 7.51M | // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in |
451 | 7.51M | // "Maintaining a topological order under edge insertions". |
452 | 7.51M | // |
453 | 7.51M | // Short description of the algorithm: |
454 | 7.51M | // |
455 | 7.51M | // Topological ordering, ord, of a DAG maps each node to a topological |
456 | 7.51M | // index so that for all edges X->Y it is the case that ord(X) < ord(Y). |
457 | 7.51M | // |
458 | 7.51M | // This means that if there is a path from the node X to the node Z, |
459 | 7.51M | // then ord(X) < ord(Z). |
460 | 7.51M | // |
461 | 7.51M | // This property can be used to check for reachability of nodes: |
462 | 7.51M | // if Z is reachable from X, then an insertion of the edge Z->X would |
463 | 7.51M | // create a cycle. |
464 | 7.51M | // |
465 | 7.51M | // The algorithm first computes a topological ordering for the DAG by |
466 | 7.51M | // initializing the Index2Node and Node2Index arrays and then tries to keep |
467 | 7.51M | // the ordering up-to-date after edge insertions by reordering the DAG. |
468 | 7.51M | // |
469 | 7.51M | // On insertion of the edge X->Y, the algorithm first marks by calling DFS |
470 | 7.51M | // the nodes reachable from Y, and then shifts them using Shift to lie |
471 | 7.51M | // immediately after X in Index2Node. |
472 | 7.51M | unsigned DAGSize = SUnits.size(); |
473 | 7.51M | std::vector<SUnit*> WorkList; |
474 | 7.51M | WorkList.reserve(DAGSize); |
475 | 7.51M | |
476 | 7.51M | Index2Node.resize(DAGSize); |
477 | 7.51M | Node2Index.resize(DAGSize); |
478 | 7.51M | |
479 | 7.51M | // Initialize the data structures. |
480 | 7.51M | if (ExitSU) |
481 | 4.08M | WorkList.push_back(ExitSU); |
482 | 46.2M | for (SUnit &SU : SUnits) { |
483 | 46.2M | int NodeNum = SU.NodeNum; |
484 | 46.2M | unsigned Degree = SU.Succs.size(); |
485 | 46.2M | // Temporarily use the Node2Index array as scratch space for degree counts. |
486 | 46.2M | Node2Index[NodeNum] = Degree; |
487 | 46.2M | |
488 | 46.2M | // Is it a node without dependencies? |
489 | 46.2M | if (Degree == 046.2M ) { |
490 | 6.20M | assert(SU.Succs.empty() && "SUnit should have no successors"); |
491 | 6.20M | // Collect leaf nodes. |
492 | 6.20M | WorkList.push_back(&SU); |
493 | 6.20M | } |
494 | 46.2M | } |
495 | 7.51M | |
496 | 7.51M | int Id = DAGSize; |
497 | 57.8M | while (!WorkList.empty()57.8M ) { |
498 | 50.3M | SUnit *SU = WorkList.back(); |
499 | 50.3M | WorkList.pop_back(); |
500 | 50.3M | if (SU->NodeNum < DAGSize) |
501 | 46.2M | Allocate(SU->NodeNum, --Id); |
502 | 57.5M | for (const SDep &PredDep : SU->Preds) { |
503 | 57.5M | SUnit *SU = PredDep.getSUnit(); |
504 | 57.5M | if (SU->NodeNum < DAGSize && 57.5M !--Node2Index[SU->NodeNum]57.5M ) |
505 | 57.5M | // If all dependencies of the node are processed already, |
506 | 57.5M | // then the node can be computed now. |
507 | 40.0M | WorkList.push_back(SU); |
508 | 57.5M | } |
509 | 50.3M | } |
510 | 7.51M | |
511 | 7.51M | Visited.resize(DAGSize); |
512 | 7.51M | |
513 | | #ifndef NDEBUG |
514 | | // Check correctness of the ordering |
515 | | for (SUnit &SU : SUnits) { |
516 | | for (const SDep &PD : SU.Preds) { |
517 | | assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] && |
518 | | "Wrong topological sorting"); |
519 | | } |
520 | | } |
521 | | #endif |
522 | | } |
523 | | |
524 | 663k | void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { |
525 | 663k | int UpperBound, LowerBound; |
526 | 663k | LowerBound = Node2Index[Y->NodeNum]; |
527 | 663k | UpperBound = Node2Index[X->NodeNum]; |
528 | 663k | bool HasLoop = false; |
529 | 663k | // Is Ord(X) < Ord(Y) ? |
530 | 663k | if (LowerBound < UpperBound663k ) { |
531 | 224k | // Update the topological order. |
532 | 224k | Visited.reset(); |
533 | 224k | DFS(Y, UpperBound, HasLoop); |
534 | 224k | assert(!HasLoop && "Inserted edge creates a loop!"); |
535 | 224k | // Recompute topological indexes. |
536 | 224k | Shift(Visited, LowerBound, UpperBound); |
537 | 224k | } |
538 | 663k | } |
539 | | |
540 | 871 | void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { |
541 | 871 | // InitDAGTopologicalSorting(); |
542 | 871 | } |
543 | | |
544 | | void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, |
545 | 478k | bool &HasLoop) { |
546 | 478k | std::vector<const SUnit*> WorkList; |
547 | 478k | WorkList.reserve(SUnits.size()); |
548 | 478k | |
549 | 478k | WorkList.push_back(SU); |
550 | 1.01M | do { |
551 | 1.01M | SU = WorkList.back(); |
552 | 1.01M | WorkList.pop_back(); |
553 | 1.01M | Visited.set(SU->NodeNum); |
554 | 1.01M | for (const SDep &SuccDep |
555 | 6.43M | : make_range(SU->Succs.rbegin(), SU->Succs.rend())) { |
556 | 6.43M | unsigned s = SuccDep.getSUnit()->NodeNum; |
557 | 6.43M | // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). |
558 | 6.43M | if (s >= Node2Index.size()) |
559 | 144k | continue; |
560 | 6.28M | if (6.28M Node2Index[s] == UpperBound6.28M ) { |
561 | 22.3k | HasLoop = true; |
562 | 22.3k | return; |
563 | 22.3k | } |
564 | 6.26M | // Visit successors if not already and in affected region. |
565 | 6.26M | if (6.26M !Visited.test(s) && 6.26M Node2Index[s] < UpperBound1.58M ) { |
566 | 587k | WorkList.push_back(SuccDep.getSUnit()); |
567 | 587k | } |
568 | 6.43M | } |
569 | 997k | } while (!WorkList.empty()); |
570 | 478k | } |
571 | | |
572 | | std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU, |
573 | | const SUnit &TargetSU, |
574 | 0 | bool &Success) { |
575 | 0 | std::vector<const SUnit*> WorkList; |
576 | 0 | int LowerBound = Node2Index[StartSU.NodeNum]; |
577 | 0 | int UpperBound = Node2Index[TargetSU.NodeNum]; |
578 | 0 | bool Found = false; |
579 | 0 | BitVector VisitedBack; |
580 | 0 | std::vector<int> Nodes; |
581 | 0 |
|
582 | 0 | if (LowerBound > UpperBound0 ) { |
583 | 0 | Success = false; |
584 | 0 | return Nodes; |
585 | 0 | } |
586 | 0 |
|
587 | 0 | WorkList.reserve(SUnits.size()); |
588 | 0 | Visited.reset(); |
589 | 0 |
|
590 | 0 | // Starting from StartSU, visit all successors up |
591 | 0 | // to UpperBound. |
592 | 0 | WorkList.push_back(&StartSU); |
593 | 0 | do { |
594 | 0 | const SUnit *SU = WorkList.back(); |
595 | 0 | WorkList.pop_back(); |
596 | 0 | for (int I = SU->Succs.size()-1; I >= 00 ; --I0 ) { |
597 | 0 | const SUnit *Succ = SU->Succs[I].getSUnit(); |
598 | 0 | unsigned s = Succ->NodeNum; |
599 | 0 | // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). |
600 | 0 | if (Succ->isBoundaryNode()) |
601 | 0 | continue; |
602 | 0 | if (0 Node2Index[s] == UpperBound0 ) { |
603 | 0 | Found = true; |
604 | 0 | continue; |
605 | 0 | } |
606 | 0 | // Visit successors if not already and in affected region. |
607 | 0 | if (0 !Visited.test(s) && 0 Node2Index[s] < UpperBound0 ) { |
608 | 0 | Visited.set(s); |
609 | 0 | WorkList.push_back(Succ); |
610 | 0 | } |
611 | 0 | } |
612 | 0 | } while (!WorkList.empty()); |
613 | 0 |
|
614 | 0 | if (!Found0 ) { |
615 | 0 | Success = false; |
616 | 0 | return Nodes; |
617 | 0 | } |
618 | 0 |
|
619 | 0 | WorkList.clear(); |
620 | 0 | VisitedBack.resize(SUnits.size()); |
621 | 0 | Found = false; |
622 | 0 |
|
623 | 0 | // Starting from TargetSU, visit all predecessors up |
624 | 0 | // to LowerBound. SUs that are visited by the two |
625 | 0 | // passes are added to Nodes. |
626 | 0 | WorkList.push_back(&TargetSU); |
627 | 0 | do { |
628 | 0 | const SUnit *SU = WorkList.back(); |
629 | 0 | WorkList.pop_back(); |
630 | 0 | for (int I = SU->Preds.size()-1; I >= 00 ; --I0 ) { |
631 | 0 | const SUnit *Pred = SU->Preds[I].getSUnit(); |
632 | 0 | unsigned s = Pred->NodeNum; |
633 | 0 | // Edges to non-SUnits are allowed but ignored (e.g. EntrySU). |
634 | 0 | if (Pred->isBoundaryNode()) |
635 | 0 | continue; |
636 | 0 | if (0 Node2Index[s] == LowerBound0 ) { |
637 | 0 | Found = true; |
638 | 0 | continue; |
639 | 0 | } |
640 | 0 | if (0 !VisitedBack.test(s) && 0 Visited.test(s)0 ) { |
641 | 0 | VisitedBack.set(s); |
642 | 0 | WorkList.push_back(Pred); |
643 | 0 | Nodes.push_back(s); |
644 | 0 | } |
645 | 0 | } |
646 | 0 | } while (!WorkList.empty()); |
647 | 0 |
|
648 | 0 | assert(Found && "Error in SUnit Graph!"); |
649 | 0 | Success = true; |
650 | 0 | return Nodes; |
651 | 0 | } |
652 | | |
653 | | void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, |
654 | 224k | int UpperBound) { |
655 | 224k | std::vector<int> L; |
656 | 224k | int shift = 0; |
657 | 224k | int i; |
658 | 224k | |
659 | 1.23M | for (i = LowerBound; i <= UpperBound1.23M ; ++i1.00M ) { |
660 | 1.00M | // w is node at topological index i. |
661 | 1.00M | int w = Index2Node[i]; |
662 | 1.00M | if (Visited.test(w)1.00M ) { |
663 | 382k | // Unmark. |
664 | 382k | Visited.reset(w); |
665 | 382k | L.push_back(w); |
666 | 382k | shift = shift + 1; |
667 | 1.00M | } else { |
668 | 626k | Allocate(w, i - shift); |
669 | 626k | } |
670 | 1.00M | } |
671 | 224k | |
672 | 382k | for (unsigned LI : L) { |
673 | 382k | Allocate(LI, i - shift); |
674 | 382k | i = i + 1; |
675 | 382k | } |
676 | 224k | } |
677 | | |
678 | 3.85k | bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { |
679 | 3.85k | // Is SU reachable from TargetSU via successor edges? |
680 | 3.85k | if (IsReachable(SU, TargetSU)) |
681 | 1.84k | return true; |
682 | 2.01k | for (const SDep &PredDep : TargetSU->Preds) |
683 | 8.75k | if (8.75k PredDep.isAssignedRegDep() && |
684 | 705 | IsReachable(SU, PredDep.getSUnit())) |
685 | 286 | return true; |
686 | 1.72k | return false; |
687 | 1.72k | } |
688 | | |
689 | | bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, |
690 | 716k | const SUnit *TargetSU) { |
691 | 716k | // If insertion of the edge SU->TargetSU would create a cycle |
692 | 716k | // then there is a path from TargetSU to SU. |
693 | 716k | int UpperBound, LowerBound; |
694 | 716k | LowerBound = Node2Index[TargetSU->NodeNum]; |
695 | 716k | UpperBound = Node2Index[SU->NodeNum]; |
696 | 716k | bool HasLoop = false; |
697 | 716k | // Is Ord(TargetSU) < Ord(SU) ? |
698 | 716k | if (LowerBound < UpperBound716k ) { |
699 | 254k | Visited.reset(); |
700 | 254k | // There may be a path from TargetSU to SU. Check for it. |
701 | 254k | DFS(TargetSU, UpperBound, HasLoop); |
702 | 254k | } |
703 | 716k | return HasLoop; |
704 | 716k | } |
705 | | |
706 | 47.3M | void ScheduleDAGTopologicalSort::Allocate(int n, int index) { |
707 | 47.3M | Node2Index[n] = index; |
708 | 47.3M | Index2Node[index] = n; |
709 | 47.3M | } |
710 | | |
711 | | ScheduleDAGTopologicalSort:: |
712 | | ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu) |
713 | 3.99M | : SUnits(sunits), ExitSU(exitsu) {} |
714 | | |
715 | 4.52M | ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default; |