Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
Line
Count
Source (jump to first uncovered line)
1
//===---------------------------- GCNILPSched.cpp - -----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/CodeGen/ScheduleDAG.h"
14
15
using namespace llvm;
16
17
#define DEBUG_TYPE "machine-scheduler"
18
19
namespace {
20
21
class GCNILPScheduler {
22
  struct Candidate : ilist_node<Candidate> {
23
    SUnit *SU;
24
25
    Candidate(SUnit *SU_)
26
454
      : SU(SU_) {}
27
  };
28
29
  SpecificBumpPtrAllocator<Candidate> Alloc;
30
  typedef simple_ilist<Candidate> Queue;
31
  Queue PendingQueue;
32
  Queue AvailQueue;
33
  unsigned CurQueueId = 0;
34
35
  std::vector<unsigned> SUNumbers;
36
37
  /// CurCycle - The current scheduler state corresponds to this cycle.
38
  unsigned CurCycle = 0;
39
40
  unsigned getNodePriority(const SUnit *SU) const;
41
42
  const SUnit *pickBest(const SUnit *left, const SUnit *right);
43
  Candidate* pickCandidate();
44
45
  void releasePending();
46
  void advanceToCycle(unsigned NextCycle);
47
  void releasePredecessors(const SUnit* SU);
48
49
public:
50
  std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
51
                                     const ScheduleDAG &DAG);
52
};
53
} // namespace
54
55
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
56
/// Smaller number is the higher priority.
57
static unsigned
58
1.41k
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
59
1.41k
  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
60
1.41k
  if (SethiUllmanNumber != 0)
61
958
    return SethiUllmanNumber;
62
454
63
454
  unsigned Extra = 0;
64
4.50k
  for (const SDep &Pred : SU->Preds) {
65
4.50k
    if (Pred.isCtrl()) 
continue3.55k
; // ignore chain preds
66
958
    SUnit *PredSU = Pred.getSUnit();
67
958
    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
68
958
    if (PredSethiUllman > SethiUllmanNumber) {
69
476
      SethiUllmanNumber = PredSethiUllman;
70
476
      Extra = 0;
71
476
    }
72
482
    else if (PredSethiUllman == SethiUllmanNumber)
73
446
      ++Extra;
74
958
  }
75
454
76
454
  SethiUllmanNumber += Extra;
77
454
78
454
  if (SethiUllmanNumber == 0)
79
4
    SethiUllmanNumber = 1;
80
454
81
454
  return SethiUllmanNumber;
82
454
}
83
84
// Lower priority means schedule further down. For bottom-up scheduling, lower
85
// priority SUs are scheduled before higher priority SUs.
86
19.4k
unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
87
19.4k
  assert(SU->NodeNum < SUNumbers.size());
88
19.4k
  if (SU->NumSuccs == 0 && 
SU->NumPreds != 0106
)
89
106
    // If SU does not have a register use, i.e. it doesn't produce a value
90
106
    // that would be consumed (e.g. store), then it terminates a chain of
91
106
    // computation.  Give it a large SethiUllman number so it will be
92
106
    // scheduled right before its predecessors that it doesn't lengthen
93
106
    // their live ranges.
94
106
    return 0xffff;
95
19.3k
96
19.3k
  if (SU->NumPreds == 0 && 
SU->NumSuccs != 04
)
97
4
    // If SU does not have a register def, schedule it close to its uses
98
4
    // because it does not lengthen any live ranges.
99
4
    return 0;
100
19.3k
101
19.3k
  return SUNumbers[SU->NodeNum];
102
19.3k
}
103
104
/// closestSucc - Returns the scheduled cycle of the successor which is
105
/// closest to the current cycle.
106
17.6k
static unsigned closestSucc(const SUnit *SU) {
107
17.6k
  unsigned MaxHeight = 0;
108
290k
  for (const SDep &Succ : SU->Succs) {
109
290k
    if (Succ.isCtrl()) 
continue267k
; // ignore chain succs
110
22.9k
    unsigned Height = Succ.getSUnit()->getHeight();
111
22.9k
    // If there are bunch of CopyToRegs stacked up, they should be considered
112
22.9k
    // to be at the same position.
113
22.9k
    if (Height > MaxHeight)
114
16.1k
      MaxHeight = Height;
115
22.9k
  }
116
17.6k
  return MaxHeight;
117
17.6k
}
118
119
/// calcMaxScratches - Returns an cost estimate of the worse case requirement
120
/// for scratch registers, i.e. number of data dependencies.
121
17.6k
static unsigned calcMaxScratches(const SUnit *SU) {
122
17.6k
  unsigned Scratches = 0;
123
36.7k
  for (const SDep &Pred : SU->Preds) {
124
36.7k
    if (Pred.isCtrl()) 
continue1.48k
; // ignore chain preds
125
35.2k
    Scratches++;
126
35.2k
  }
127
17.6k
  return Scratches;
128
17.6k
}
129
130
// Return -1 if left has higher priority, 1 if right has higher priority.
131
// Return 0 if latency-based priority is equivalent.
132
8.80k
static int BUCompareLatency(const SUnit *left, const SUnit *right) {
133
8.80k
  // Scheduling an instruction that uses a VReg whose postincrement has not yet
134
8.80k
  // been scheduled will induce a copy. Model this as an extra cycle of latency.
135
8.80k
  int LHeight = (int)left->getHeight();
136
8.80k
  int RHeight = (int)right->getHeight();
137
8.80k
138
8.80k
  // If either node is scheduling for latency, sort them by height/depth
139
8.80k
  // and latency.
140
8.80k
141
8.80k
  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
142
8.80k
  // is enabled, grouping instructions by cycle, then its height is already
143
8.80k
  // covered so only its depth matters. We also reach this point if both stall
144
8.80k
  // but have the same height.
145
8.80k
  if (LHeight != RHeight)
146
0
    return LHeight > RHeight ? 1 : -1;
147
8.80k
148
8.80k
  int LDepth = left->getDepth();
149
8.80k
  int RDepth = right->getDepth();
150
8.80k
  if (LDepth != RDepth) {
151
0
    LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
152
0
                      << ") depth " << LDepth << " vs SU (" << right->NodeNum
153
0
                      << ") depth " << RDepth << "\n");
154
0
    return LDepth < RDepth ? 1 : -1;
155
0
  }
156
8.80k
  if (left->Latency != right->Latency)
157
0
    return left->Latency > right->Latency ? 1 : -1;
158
8.80k
159
8.80k
  return 0;
160
8.80k
}
161
162
const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
163
9.78k
{
164
9.78k
  // TODO: add register pressure lowering checks
165
9.78k
166
9.78k
  bool const DisableSchedCriticalPath = false;
167
9.78k
  int MaxReorderWindow = 6;
168
9.78k
  if (!DisableSchedCriticalPath) {
169
9.78k
    int spread = (int)left->getDepth() - (int)right->getDepth();
170
9.78k
    if (std::abs(spread) > MaxReorderWindow) {
171
76
      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
172
76
                        << left->getDepth() << " != SU(" << right->NodeNum
173
76
                        << "): " << right->getDepth() << "\n");
174
76
      return left->getDepth() < right->getDepth() ? right : 
left0
;
175
76
    }
176
9.71k
  }
177
9.71k
178
9.71k
  bool const DisableSchedHeight = false;
179
9.71k
  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
180
838
    int spread = (int)left->getHeight() - (int)right->getHeight();
181
838
    if (std::abs(spread) > MaxReorderWindow)
182
0
      return left->getHeight() > right->getHeight() ? right : left;
183
9.71k
  }
184
9.71k
185
9.71k
  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
186
9.71k
  unsigned LPriority = getNodePriority(left);
187
9.71k
  unsigned RPriority = getNodePriority(right);
188
9.71k
189
9.71k
  if (LPriority != RPriority)
190
900
    return LPriority > RPriority ? 
right240
:
left660
;
191
8.81k
192
8.81k
  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
193
8.81k
  // e.g.
194
8.81k
  // t1 = op t2, c1
195
8.81k
  // t3 = op t4, c2
196
8.81k
  //
197
8.81k
  // and the following instructions are both ready.
198
8.81k
  // t2 = op c3
199
8.81k
  // t4 = op c4
200
8.81k
  //
201
8.81k
  // Then schedule t2 = op first.
202
8.81k
  // i.e.
203
8.81k
  // t4 = op c4
204
8.81k
  // t2 = op c3
205
8.81k
  // t1 = op t2, c1
206
8.81k
  // t3 = op t4, c2
207
8.81k
  //
208
8.81k
  // This creates more short live intervals.
209
8.81k
  unsigned LDist = closestSucc(left);
210
8.81k
  unsigned RDist = closestSucc(right);
211
8.81k
  if (LDist != RDist)
212
6
    return LDist < RDist ? right : 
left0
;
213
8.80k
214
8.80k
  // How many registers becomes live when the node is scheduled.
215
8.80k
  unsigned LScratch = calcMaxScratches(left);
216
8.80k
  unsigned RScratch = calcMaxScratches(right);
217
8.80k
  if (LScratch != RScratch)
218
0
    return LScratch > RScratch ? right : left;
219
8.80k
220
8.80k
  bool const DisableSchedCycles = false;
221
8.80k
  if (!DisableSchedCycles) {
222
8.80k
    int result = BUCompareLatency(left, right);
223
8.80k
    if (result != 0)
224
0
      return result > 0 ? right : left;
225
8.80k
    return left;
226
8.80k
  }
227
0
  else {
228
0
    if (left->getHeight() != right->getHeight())
229
0
      return (left->getHeight() > right->getHeight()) ? right : left;
230
0
231
0
    if (left->getDepth() != right->getDepth())
232
0
      return (left->getDepth() < right->getDepth()) ? right : left;
233
0
  }
234
0
235
0
  assert(left->NodeQueueId && right->NodeQueueId &&
236
0
        "NodeQueueId cannot be zero");
237
0
  return (left->NodeQueueId > right->NodeQueueId) ? right : left;
238
0
}
239
240
454
GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
241
454
  if (AvailQueue.empty())
242
0
    return nullptr;
243
454
  auto Best = AvailQueue.begin();
244
10.2k
  for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; 
++I9.78k
) {
245
9.78k
    auto NewBestSU = pickBest(Best->SU, I->SU);
246
9.78k
    if (NewBestSU != Best->SU) {
247
322
      assert(NewBestSU == I->SU);
248
322
      Best = I;
249
322
    }
250
9.78k
  }
251
454
  return &*Best;
252
454
}
253
254
66
void GCNILPScheduler::releasePending() {
255
66
  // Check to see if any of the pending instructions are ready to issue.  If
256
66
  // so, add them to the available queue.
257
518
  for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
258
452
    auto &C = *I++;
259
452
    if (C.SU->getHeight() <= CurCycle) {
260
452
      PendingQueue.remove(C);
261
452
      AvailQueue.push_back(C);
262
452
      C.SU->NodeQueueId = CurQueueId++;
263
452
    }
264
452
  }
265
66
}
266
267
/// Move the scheduler state forward by the specified number of Cycles.
268
520
void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
269
520
  if (NextCycle <= CurCycle)
270
454
    return;
271
66
  CurCycle = NextCycle;
272
66
  releasePending();
273
66
}
274
275
456
void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
276
4.50k
  for (const auto &PredEdge : SU->Preds) {
277
4.50k
    auto PredSU = PredEdge.getSUnit();
278
4.50k
    if (PredEdge.isWeak())
279
0
      continue;
280
4.50k
    assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
281
4.50k
282
4.50k
    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
283
4.50k
284
4.50k
    if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
285
452
      PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
286
4.50k
  }
287
456
}
288
289
std::vector<const SUnit*>
290
GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
291
2
                          const ScheduleDAG &DAG) {
292
2
  auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
293
2
294
2
  std::vector<SUnit> SUSavedCopy;
295
2
  SUSavedCopy.resize(SUnits.size());
296
2
297
2
  // we cannot save only those fields we touch: some of them are private
298
2
  // so save units verbatim: this assumes SUnit should have value semantics
299
2
  for (const SUnit &SU : SUnits)
300
454
    SUSavedCopy[SU.NodeNum] = SU;
301
2
302
2
  SUNumbers.assign(SUnits.size(), 0);
303
2
  for (const SUnit &SU : SUnits)
304
454
    CalcNodeSethiUllmanNumber(&SU, SUNumbers);
305
2
306
2
  for (auto SU : BotRoots) {
307
2
    AvailQueue.push_back(
308
2
      *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
309
2
  }
310
2
  releasePredecessors(&DAG.ExitSU);
311
2
312
2
  std::vector<const SUnit*> Schedule;
313
2
  Schedule.reserve(SUnits.size());
314
456
  while (true) {
315
456
    if (AvailQueue.empty() && 
!PendingQueue.empty()68
) {
316
66
      auto EarliestSU = std::min_element(
317
66
        PendingQueue.begin(), PendingQueue.end(),
318
386
        [=](const Candidate& C1, const Candidate& C2) {
319
386
        return C1.SU->getHeight() < C2.SU->getHeight();
320
386
      })->SU;
321
66
      advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
322
66
    }
323
456
    if (AvailQueue.empty())
324
2
      break;
325
454
326
454
    LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
327
454
                         "Ready queue:";
328
454
               for (auto &C
329
454
                    : AvailQueue) dbgs()
330
454
               << ' ' << C.SU->NodeNum;
331
454
               dbgs() << '\n';);
332
454
333
454
    auto C = pickCandidate();
334
454
    assert(C);
335
454
    AvailQueue.remove(*C);
336
454
    auto SU = C->SU;
337
454
    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
338
454
339
454
    advanceToCycle(SU->getHeight());
340
454
341
454
    releasePredecessors(SU);
342
454
    Schedule.push_back(SU);
343
454
    SU->isScheduled = true;
344
454
  }
345
2
  assert(SUnits.size() == Schedule.size());
346
2
347
2
  std::reverse(Schedule.begin(), Schedule.end());
348
2
349
2
  // restore units
350
2
  for (auto &SU : SUnits)
351
454
    SU = SUSavedCopy[SU.NodeNum];
352
2
353
2
  return Schedule;
354
2
}
355
356
namespace llvm {
357
std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
358
2
                                              const ScheduleDAG &DAG) {
359
2
  GCNILPScheduler S;
360
2
  return S.schedule(BotRoots, DAG);
361
2
}
362
}