/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- GCNMinRegStrategy.cpp ----------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "llvm/ADT/ArrayRef.h" |
10 | | #include "llvm/ADT/SmallPtrSet.h" |
11 | | #include "llvm/ADT/SmallVector.h" |
12 | | #include "llvm/ADT/ilist_node.h" |
13 | | #include "llvm/ADT/simple_ilist.h" |
14 | | #include "llvm/CodeGen/ScheduleDAG.h" |
15 | | #include "llvm/Support/Allocator.h" |
16 | | #include "llvm/Support/Debug.h" |
17 | | #include "llvm/Support/raw_ostream.h" |
18 | | #include <cassert> |
19 | | #include <cstdint> |
20 | | #include <limits> |
21 | | #include <vector> |
22 | | |
23 | | using namespace llvm; |
24 | | |
25 | | #define DEBUG_TYPE "machine-scheduler" |
26 | | |
27 | | namespace { |
28 | | |
29 | | class GCNMinRegScheduler { |
30 | | struct Candidate : ilist_node<Candidate> { |
31 | | const SUnit *SU; |
32 | | int Priority; |
33 | | |
34 | | Candidate(const SUnit *SU_, int Priority_ = 0) |
35 | 1.97k | : SU(SU_), Priority(Priority_) {} |
36 | | }; |
37 | | |
38 | | SpecificBumpPtrAllocator<Candidate> Alloc; |
39 | | using Queue = simple_ilist<Candidate>; |
40 | | Queue RQ; // Ready queue |
41 | | |
42 | | std::vector<unsigned> NumPreds; |
43 | | |
44 | 491k | bool isScheduled(const SUnit *SU) const { |
45 | 491k | assert(!SU->isBoundaryNode()); |
46 | 491k | return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max(); |
47 | 491k | } |
48 | | |
49 | 1.97k | void setIsScheduled(const SUnit *SU) { |
50 | 1.97k | assert(!SU->isBoundaryNode()); |
51 | 1.97k | NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max(); |
52 | 1.97k | } |
53 | | |
54 | 0 | unsigned getNumPreds(const SUnit *SU) const { |
55 | 0 | assert(!SU->isBoundaryNode()); |
56 | 0 | assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max()); |
57 | 0 | return NumPreds[SU->NodeNum]; |
58 | 0 | } |
59 | | |
60 | 14.6k | unsigned decNumPreds(const SUnit *SU) { |
61 | 14.6k | assert(!SU->isBoundaryNode()); |
62 | 14.6k | assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max()); |
63 | 14.6k | return --NumPreds[SU->NodeNum]; |
64 | 14.6k | } |
65 | | |
66 | | void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits); |
67 | | |
68 | | int getReadySuccessors(const SUnit *SU) const; |
69 | | int getNotReadySuccessors(const SUnit *SU) const; |
70 | | |
71 | | template <typename Calc> |
72 | | unsigned findMax(unsigned Num, Calc C); |
73 | | |
74 | | Candidate* pickCandidate(); |
75 | | |
76 | | void bumpPredsPriority(const SUnit *SchedSU, int Priority); |
77 | | void releaseSuccessors(const SUnit* SU, int Priority); |
78 | | |
79 | | public: |
80 | | std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, |
81 | | const ScheduleDAG &DAG); |
82 | | }; |
83 | | |
84 | | } // end anonymous namespace |
85 | | |
86 | 6 | void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) { |
87 | 6 | NumPreds.resize(SUnits.size()); |
88 | 1.97k | for (unsigned I = 0; I < SUnits.size(); ++I1.97k ) |
89 | 1.97k | NumPreds[I] = SUnits[I].NumPredsLeft; |
90 | 6 | } |
91 | | |
92 | 22.3k | int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const { |
93 | 22.3k | unsigned NumSchedSuccs = 0; |
94 | 190k | for (auto SDep : SU->Succs) { |
95 | 190k | bool wouldBeScheduled = true; |
96 | 249k | for (auto PDep : SDep.getSUnit()->Preds) { |
97 | 249k | auto PSU = PDep.getSUnit(); |
98 | 249k | assert(!PSU->isBoundaryNode()); |
99 | 249k | if (PSU != SU && !isScheduled(PSU)221k ) { |
100 | 175k | wouldBeScheduled = false; |
101 | 175k | break; |
102 | 175k | } |
103 | 249k | } |
104 | 190k | NumSchedSuccs += wouldBeScheduled ? 114.9k : 0175k ; |
105 | 190k | } |
106 | 22.3k | return NumSchedSuccs; |
107 | 22.3k | } |
108 | | |
109 | 12.4k | int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const { |
110 | 12.4k | return SU->Succs.size() - getReadySuccessors(SU); |
111 | 12.4k | } |
112 | | |
113 | | template <typename Calc> |
114 | 2.86k | unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) { |
115 | 2.86k | assert(!RQ.empty() && Num <= RQ.size()); |
116 | 2.86k | |
117 | 2.86k | using T = decltype(C(*RQ.begin())) ; |
118 | 2.86k | |
119 | 2.86k | T Max = std::numeric_limits<T>::min(); |
120 | 2.86k | unsigned NumMax = 0; |
121 | 106k | for (auto I = RQ.begin(); Num; --Num103k ) { |
122 | 103k | T Cur = C(*I); |
123 | 103k | if (Cur >= Max) { |
124 | 42.1k | if (Cur > Max) { |
125 | 10.2k | Max = Cur; |
126 | 10.2k | NumMax = 1; |
127 | 10.2k | } else |
128 | 31.8k | ++NumMax; |
129 | 42.1k | auto &Cand = *I++; |
130 | 42.1k | RQ.remove(Cand); |
131 | 42.1k | RQ.push_front(Cand); |
132 | 42.1k | continue; |
133 | 42.1k | } |
134 | 61.1k | ++I; |
135 | 61.1k | } |
136 | 2.86k | return NumMax; |
137 | 2.86k | } GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_0>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_0) Line | Count | Source | 114 | 1.77k | unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) { | 115 | 1.77k | assert(!RQ.empty() && Num <= RQ.size()); | 116 | 1.77k | | 117 | 1.77k | using T = decltype(C(*RQ.begin())) ; | 118 | 1.77k | | 119 | 1.77k | T Max = std::numeric_limits<T>::min(); | 120 | 1.77k | unsigned NumMax = 0; | 121 | 76.6k | for (auto I = RQ.begin(); Num; --Num74.9k ) { | 122 | 74.9k | T Cur = C(*I); | 123 | 74.9k | if (Cur >= Max) { | 124 | 17.5k | if (Cur > Max) { | 125 | 1.85k | Max = Cur; | 126 | 1.85k | NumMax = 1; | 127 | 1.85k | } else | 128 | 15.6k | ++NumMax; | 129 | 17.5k | auto &Cand = *I++; | 130 | 17.5k | RQ.remove(Cand); | 131 | 17.5k | RQ.push_front(Cand); | 132 | 17.5k | continue; | 133 | 17.5k | } | 134 | 57.3k | ++I; | 135 | 57.3k | } | 136 | 1.77k | return NumMax; | 137 | 1.77k | } |
GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_1>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_1) Line | Count | Source | 114 | 472 | unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) { | 115 | 472 | assert(!RQ.empty() && Num <= RQ.size()); | 116 | 472 | | 117 | 472 | using T = decltype(C(*RQ.begin())) ; | 118 | 472 | | 119 | 472 | T Max = std::numeric_limits<T>::min(); | 120 | 472 | unsigned NumMax = 0; | 121 | 12.9k | for (auto I = RQ.begin(); Num; --Num12.4k ) { | 122 | 12.4k | T Cur = C(*I); | 123 | 12.4k | if (Cur >= Max) { | 124 | 9.18k | if (Cur > Max) { | 125 | 694 | Max = Cur; | 126 | 694 | NumMax = 1; | 127 | 694 | } else | 128 | 8.49k | ++NumMax; | 129 | 9.18k | auto &Cand = *I++; | 130 | 9.18k | RQ.remove(Cand); | 131 | 9.18k | RQ.push_front(Cand); | 132 | 9.18k | continue; | 133 | 9.18k | } | 134 | 3.25k | ++I; | 135 | 3.25k | } | 136 | 472 | return NumMax; | 137 | 472 | } |
GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_2>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_2) Line | Count | Source | 114 | 308 | unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) { | 115 | 308 | assert(!RQ.empty() && Num <= RQ.size()); | 116 | 308 | | 117 | 308 | using T = decltype(C(*RQ.begin())) ; | 118 | 308 | | 119 | 308 | T Max = std::numeric_limits<T>::min(); | 120 | 308 | unsigned NumMax = 0; | 121 | 8.25k | for (auto I = RQ.begin(); Num; --Num7.94k ) { | 122 | 7.94k | T Cur = C(*I); | 123 | 7.94k | if (Cur >= Max) { | 124 | 7.94k | if (Cur > Max) { | 125 | 308 | Max = Cur; | 126 | 308 | NumMax = 1; | 127 | 308 | } else | 128 | 7.63k | ++NumMax; | 129 | 7.94k | auto &Cand = *I++; | 130 | 7.94k | RQ.remove(Cand); | 131 | 7.94k | RQ.push_front(Cand); | 132 | 7.94k | continue; | 133 | 7.94k | } | 134 | 0 | ++I; | 135 | 0 | } | 136 | 308 | return NumMax; | 137 | 308 | } |
GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_3>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_3) Line | Count | Source | 114 | 308 | unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) { | 115 | 308 | assert(!RQ.empty() && Num <= RQ.size()); | 116 | 308 | | 117 | 308 | using T = decltype(C(*RQ.begin())) ; | 118 | 308 | | 119 | 308 | T Max = std::numeric_limits<T>::min(); | 120 | 308 | unsigned NumMax = 0; | 121 | 8.25k | for (auto I = RQ.begin(); Num; --Num7.94k ) { | 122 | 7.94k | T Cur = C(*I); | 123 | 7.94k | if (Cur >= Max) { | 124 | 7.43k | if (Cur > Max) { | 125 | 7.43k | Max = Cur; | 126 | 7.43k | NumMax = 1; | 127 | 7.43k | } else | 128 | 0 | ++NumMax; | 129 | 7.43k | auto &Cand = *I++; | 130 | 7.43k | RQ.remove(Cand); | 131 | 7.43k | RQ.push_front(Cand); | 132 | 7.43k | continue; | 133 | 7.43k | } | 134 | 504 | ++I; | 135 | 504 | } | 136 | 308 | return NumMax; | 137 | 308 | } |
|
138 | | |
139 | 1.97k | GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() { |
140 | 1.97k | do { |
141 | 1.97k | unsigned Num = RQ.size(); |
142 | 1.97k | if (Num == 1) break198 ; |
143 | 1.77k | |
144 | 1.77k | LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num |
145 | 1.77k | << '\n'); |
146 | 74.9k | Num = findMax(Num, [=](const Candidate &C) { return C.Priority; }); |
147 | 1.77k | if (Num == 1) break1.30k ; |
148 | 472 | |
149 | 472 | LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among " |
150 | 472 | << Num << '\n'); |
151 | 12.4k | Num = findMax(Num, [=](const Candidate &C) { |
152 | 12.4k | auto SU = C.SU; |
153 | 12.4k | int Res = getNotReadySuccessors(SU); |
154 | 12.4k | LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready " |
155 | 12.4k | << Res << " successors, metric = " << -Res << '\n'); |
156 | 12.4k | return -Res; |
157 | 12.4k | }); |
158 | 472 | if (Num == 1) break164 ; |
159 | 308 | |
160 | 308 | LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num |
161 | 308 | << '\n'); |
162 | 7.94k | Num = findMax(Num, [=](const Candidate &C) { |
163 | 7.94k | auto SU = C.SU; |
164 | 7.94k | auto Res = getReadySuccessors(SU); |
165 | 7.94k | LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res |
166 | 7.94k | << " successors, metric = " << Res << '\n'); |
167 | 7.94k | return Res; |
168 | 7.94k | }); |
169 | 308 | if (Num == 1) break0 ; |
170 | 308 | |
171 | 308 | Num = Num ? Num : RQ.size()0 ; |
172 | 308 | LLVM_DEBUG( |
173 | 308 | dbgs() |
174 | 308 | << "\nCan't find best candidate, selecting in program order among " |
175 | 308 | << Num << '\n'); |
176 | 7.94k | Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; }); |
177 | 308 | assert(Num == 1); |
178 | 308 | } while (false); |
179 | 1.97k | |
180 | 1.97k | return &RQ.front(); |
181 | 1.97k | } |
182 | | |
183 | 656 | void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) { |
184 | 656 | SmallPtrSet<const SUnit*, 32> Set; |
185 | 7.30k | for (const auto &S : SchedSU->Succs) { |
186 | 7.30k | if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) || |
187 | 7.30k | S.getKind() != SDep::Data) |
188 | 6.49k | continue; |
189 | 24.0k | for (const auto &P : S.getSUnit()->Preds)814 { |
190 | 24.0k | auto PSU = P.getSUnit(); |
191 | 24.0k | assert(!PSU->isBoundaryNode()); |
192 | 24.0k | if (PSU != SchedSU && !isScheduled(PSU)23.2k ) { |
193 | 13.8k | Set.insert(PSU); |
194 | 13.8k | } |
195 | 24.0k | } |
196 | 814 | } |
197 | 656 | SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end()); |
198 | 24.8k | while (!Worklist.empty()) { |
199 | 24.1k | auto SU = Worklist.pop_back_val(); |
200 | 24.1k | assert(!SU->isBoundaryNode()); |
201 | 239k | for (const auto &P : SU->Preds) { |
202 | 239k | if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) && |
203 | 239k | Set.insert(P.getSUnit()).second95.5k ) |
204 | 12.6k | Worklist.push_back(P.getSUnit()); |
205 | 239k | } |
206 | 24.1k | } |
207 | 656 | LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum |
208 | 656 | << ")'s non-ready successors of " << Priority |
209 | 656 | << " priority in ready queue: "); |
210 | 656 | const auto SetEnd = Set.end(); |
211 | 26.5k | for (auto &C : RQ) { |
212 | 26.5k | if (Set.find(C.SU) != SetEnd) { |
213 | 9.60k | C.Priority = Priority; |
214 | 9.60k | LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')'); |
215 | 9.60k | } |
216 | 26.5k | } |
217 | 656 | LLVM_DEBUG(dbgs() << '\n'); |
218 | 656 | } |
219 | | |
220 | 1.97k | void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) { |
221 | 14.6k | for (const auto &S : SU->Succs) { |
222 | 14.6k | auto SuccSU = S.getSUnit(); |
223 | 14.6k | if (S.isWeak()) |
224 | 0 | continue; |
225 | 14.6k | assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0); |
226 | 14.6k | if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0) |
227 | 1.95k | RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority)); |
228 | 14.6k | } |
229 | 1.97k | } |
230 | | |
231 | | std::vector<const SUnit*> |
232 | | GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots, |
233 | 6 | const ScheduleDAG &DAG) { |
234 | 6 | const auto &SUnits = DAG.SUnits; |
235 | 6 | std::vector<const SUnit*> Schedule; |
236 | 6 | Schedule.reserve(SUnits.size()); |
237 | 6 | |
238 | 6 | initNumPreds(SUnits); |
239 | 6 | |
240 | 6 | int StepNo = 0; |
241 | 6 | |
242 | 16 | for (auto SU : TopRoots) { |
243 | 16 | RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo)); |
244 | 16 | } |
245 | 6 | releaseSuccessors(&DAG.EntrySU, StepNo); |
246 | 6 | |
247 | 1.97k | while (!RQ.empty()) { |
248 | 1.97k | LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo |
249 | 1.97k | << "\n" |
250 | 1.97k | "Ready queue:"; |
251 | 1.97k | for (auto &C |
252 | 1.97k | : RQ) dbgs() |
253 | 1.97k | << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')'; |
254 | 1.97k | dbgs() << '\n';); |
255 | 1.97k | |
256 | 1.97k | auto C = pickCandidate(); |
257 | 1.97k | assert(C); |
258 | 1.97k | RQ.remove(*C); |
259 | 1.97k | auto SU = C->SU; |
260 | 1.97k | LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU)); |
261 | 1.97k | |
262 | 1.97k | releaseSuccessors(SU, StepNo); |
263 | 1.97k | Schedule.push_back(SU); |
264 | 1.97k | setIsScheduled(SU); |
265 | 1.97k | |
266 | 1.97k | if (getReadySuccessors(SU) == 0) |
267 | 656 | bumpPredsPriority(SU, StepNo); |
268 | 1.97k | |
269 | 1.97k | ++StepNo; |
270 | 1.97k | } |
271 | 6 | assert(SUnits.size() == Schedule.size()); |
272 | 6 | |
273 | 6 | return Schedule; |
274 | 6 | } |
275 | | |
276 | | namespace llvm { |
277 | | |
278 | | std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots, |
279 | 6 | const ScheduleDAG &DAG) { |
280 | 6 | GCNMinRegScheduler S; |
281 | 6 | return S.schedule(TopRoots, DAG); |
282 | 6 | } |
283 | | |
284 | | } // end namespace llvm |