Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This contains a MachineSchedStrategy implementation for maximizing wave
11
/// occupancy on GCN hardware.
12
//===----------------------------------------------------------------------===//
13
14
#include "GCNSchedStrategy.h"
15
#include "AMDGPUSubtarget.h"
16
#include "SIInstrInfo.h"
17
#include "SIMachineFunctionInfo.h"
18
#include "SIRegisterInfo.h"
19
#include "llvm/CodeGen/RegisterClassInfo.h"
20
#include "llvm/Support/MathExtras.h"
21
22
#define DEBUG_TYPE "machine-scheduler"
23
24
using namespace llvm;
25
26
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
27
    const MachineSchedContext *C) :
28
25.1k
    GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
29
30
27.3k
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
31
27.3k
  GenericScheduler::initialize(DAG);
32
27.3k
33
27.3k
  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
34
27.3k
35
27.3k
  MF = &DAG->MF;
36
27.3k
37
27.3k
  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
38
27.3k
39
27.3k
  // FIXME: This is also necessary, because some passes that run after
40
27.3k
  // scheduling and before regalloc increase register pressure.
41
27.3k
  const int ErrorMargin = 3;
42
27.3k
43
27.3k
  SGPRExcessLimit = Context->RegClassInfo
44
27.3k
    ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
45
27.3k
  VGPRExcessLimit = Context->RegClassInfo
46
27.3k
    ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
47
27.3k
  if (TargetOccupancy) {
48
350
    SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
49
350
    VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
50
26.9k
  } else {
51
26.9k
    SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
52
26.9k
                                                    SRI->getSGPRPressureSet());
53
26.9k
    VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
54
26.9k
                                                    SRI->getVGPRPressureSet());
55
26.9k
  }
56
27.3k
57
27.3k
  SGPRCriticalLimit -= ErrorMargin;
58
27.3k
  VGPRCriticalLimit -= ErrorMargin;
59
27.3k
}
60
61
void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
62
                                     bool AtTop, const RegPressureTracker &RPTracker,
63
                                     const SIRegisterInfo *SRI,
64
                                     unsigned SGPRPressure,
65
3.68M
                                     unsigned VGPRPressure) {
66
3.68M
67
3.68M
  Cand.SU = SU;
68
3.68M
  Cand.AtTop = AtTop;
69
3.68M
70
3.68M
  // getDownwardPressure() and getUpwardPressure() make temporary changes to
71
3.68M
  // the tracker, so we need to pass those function a non-const copy.
72
3.68M
  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
73
3.68M
74
3.68M
  std::vector<unsigned> Pressure;
75
3.68M
  std::vector<unsigned> MaxPressure;
76
3.68M
77
3.68M
  if (AtTop)
78
721k
    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
79
2.96M
  else {
80
2.96M
    // FIXME: I think for bottom up scheduling, the register pressure is cached
81
2.96M
    // and can be retrieved by DAG->getPressureDif(SU).
82
2.96M
    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
83
2.96M
  }
84
3.68M
85
3.68M
  unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
86
3.68M
  unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
87
3.68M
88
3.68M
  // If two instructions increase the pressure of different register sets
89
3.68M
  // by the same amount, the generic scheduler will prefer to schedule the
90
3.68M
  // instruction that increases the set with the least amount of registers,
91
3.68M
  // which in our case would be SGPRs.  This is rarely what we want, so
92
3.68M
  // when we report excess/critical register pressure, we do it either
93
3.68M
  // only for VGPRs or only for SGPRs.
94
3.68M
95
3.68M
  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
96
3.68M
  const unsigned MaxVGPRPressureInc = 16;
97
3.68M
  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
98
3.68M
  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && 
SGPRPressure >= SGPRExcessLimit3.60M
;
99
3.68M
100
3.68M
101
3.68M
  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
102
3.68M
  // to increase the likelihood we don't go over the limits.  We should improve
103
3.68M
  // the analysis to look through dependencies to find the path with the least
104
3.68M
  // register pressure.
105
3.68M
106
3.68M
  // We only need to update the RPDelata for instructions that increase
107
3.68M
  // register pressure.  Instructions that decrease or keep reg pressure
108
3.68M
  // the same will be marked as RegExcess in tryCandidate() when they
109
3.68M
  // are compared with instructions that increase the register pressure.
110
3.68M
  if (ShouldTrackVGPRs && 
NewVGPRPressure >= VGPRExcessLimit80.7k
) {
111
11.8k
    Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
112
11.8k
    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
113
11.8k
  }
114
3.68M
115
3.68M
  if (ShouldTrackSGPRs && 
NewSGPRPressure >= SGPRExcessLimit25.6k
) {
116
25.2k
    Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
117
25.2k
    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
118
25.2k
  }
119
3.68M
120
3.68M
  // Register pressure is considered 'CRITICAL' if it is approaching a value
121
3.68M
  // that would reduce the wave occupancy for the execution unit.  When
122
3.68M
  // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
123
3.68M
  // has the same cost, so we don't need to prefer one over the other.
124
3.68M
125
3.68M
  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
126
3.68M
  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
127
3.68M
128
3.68M
  if (SGPRDelta >= 0 || 
VGPRDelta >= 03.52M
) {
129
1.57M
    if (SGPRDelta > VGPRDelta) {
130
131k
      Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
131
131k
      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
132
1.43M
    } else {
133
1.43M
      Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
134
1.43M
      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
135
1.43M
    }
136
1.57M
  }
137
3.68M
}
138
139
// This function is mostly cut and pasted from
140
// GenericScheduler::pickNodeFromQueue()
141
void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
142
                                         const CandPolicy &ZonePolicy,
143
                                         const RegPressureTracker &RPTracker,
144
370k
                                         SchedCandidate &Cand) {
145
370k
  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
146
370k
  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
147
370k
  unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
148
370k
  unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
149
370k
  ReadyQueue &Q = Zone.Available;
150
3.68M
  for (SUnit *SU : Q) {
151
3.68M
152
3.68M
    SchedCandidate TryCand(ZonePolicy);
153
3.68M
    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
154
3.68M
                  SGPRPressure, VGPRPressure);
155
3.68M
    // Pass SchedBoundary only when comparing nodes from the same boundary.
156
3.68M
    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? 
&Zone3.55M
:
nullptr124k
;
157
3.68M
    GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
158
3.68M
    if (TryCand.Reason != NoCand) {
159
1.33M
      // Initialize resource delta if needed in case future heuristics query it.
160
1.33M
      if (TryCand.ResDelta == SchedResourceDelta())
161
1.33M
        TryCand.initResourceDelta(Zone.DAG, SchedModel);
162
1.33M
      Cand.setBest(TryCand);
163
1.33M
    }
164
3.68M
  }
165
370k
}
166
167
// This function is mostly cut and pasted from
168
// GenericScheduler::pickNodeBidirectional()
169
406k
SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
170
406k
  // Schedule as far as possible in the direction of no choice. This is most
171
406k
  // efficient, but also provides the best heuristics for CriticalPSets.
172
406k
  if (SUnit *SU = Bot.pickOnlyChoice()) {
173
80.9k
    IsTopNode = false;
174
80.9k
    return SU;
175
80.9k
  }
176
325k
  if (SUnit *SU = Top.pickOnlyChoice()) {
177
10.6k
    IsTopNode = true;
178
10.6k
    return SU;
179
10.6k
  }
180
315k
  // Set the bottom-up policy based on the state of the current bottom zone and
181
315k
  // the instructions outside the zone, including the top zone.
182
315k
  CandPolicy BotPolicy;
183
315k
  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
184
315k
  // Set the top-down policy based on the state of the current top zone and
185
315k
  // the instructions outside the zone, including the bottom zone.
186
315k
  CandPolicy TopPolicy;
187
315k
  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
188
315k
189
315k
  // See if BotCand is still valid (because we previously scheduled from Top).
190
315k
  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
191
315k
  if (!BotCand.isValid() || 
BotCand.SU->isScheduled177k
||
192
315k
      
BotCand.Policy != BotPolicy75.0k
) {
193
246k
    BotCand.reset(CandPolicy());
194
246k
    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
195
246k
    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
196
246k
  } else {
197
68.8k
    LLVM_DEBUG(traceCandidate(BotCand));
198
68.8k
  }
199
315k
200
315k
  // Check if the top Q has a better candidate.
201
315k
  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
202
315k
  if (!TopCand.isValid() || 
TopCand.SU->isScheduled249k
||
203
315k
      
TopCand.Policy != TopPolicy207k
) {
204
124k
    TopCand.reset(CandPolicy());
205
124k
    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
206
124k
    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
207
190k
  } else {
208
190k
    LLVM_DEBUG(traceCandidate(TopCand));
209
190k
  }
210
315k
211
315k
  // Pick best from BotCand and TopCand.
212
315k
  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
213
315k
             dbgs() << "Bot Cand: "; traceCandidate(BotCand););
214
315k
  SchedCandidate Cand;
215
315k
  if (TopCand.Reason == BotCand.Reason) {
216
142k
    Cand = BotCand;
217
142k
    GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
218
142k
    TopCand.Reason = NoCand;
219
142k
    GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
220
142k
    if (TopCand.Reason != NoCand) {
221
27.0k
      Cand.setBest(TopCand);
222
115k
    } else {
223
115k
      TopCand.Reason = TopReason;
224
115k
    }
225
172k
  } else {
226
172k
    if (TopCand.Reason == RegExcess && 
TopCand.RPDelta.Excess.getUnitInc() <= 026
) {
227
12
      Cand = TopCand;
228
172k
    } else if (BotCand.Reason == RegExcess && 
BotCand.RPDelta.Excess.getUnitInc() <= 0676
) {
229
128
      Cand = BotCand;
230
172k
    } else if (TopCand.Reason == RegCritical && 
TopCand.RPDelta.CriticalMax.getUnitInc() <= 02.07k
) {
231
565
      Cand = TopCand;
232
172k
    } else if (BotCand.Reason == RegCritical && 
BotCand.RPDelta.CriticalMax.getUnitInc() <= 023.3k
) {
233
12.4k
      Cand = BotCand;
234
159k
    } else {
235
159k
      if (BotCand.Reason > TopCand.Reason) {
236
57.5k
        Cand = TopCand;
237
102k
      } else {
238
102k
        Cand = BotCand;
239
102k
      }
240
159k
    }
241
172k
  }
242
315k
  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
243
315k
244
315k
  IsTopNode = Cand.AtTop;
245
315k
  return Cand.SU;
246
315k
}
247
248
// This function is mostly cut and pasted from
249
// GenericScheduler::pickNode()
250
434k
SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
251
434k
  if (DAG->top() == DAG->bottom()) {
252
27.3k
    assert(Top.Available.empty() && Top.Pending.empty() &&
253
27.3k
           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
254
27.3k
    return nullptr;
255
27.3k
  }
256
406k
  SUnit *SU;
257
406k
  do {
258
406k
    if (RegionPolicy.OnlyTopDown) {
259
0
      SU = Top.pickOnlyChoice();
260
0
      if (!SU) {
261
0
        CandPolicy NoPolicy;
262
0
        TopCand.reset(NoPolicy);
263
0
        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
264
0
        assert(TopCand.Reason != NoCand && "failed to find a candidate");
265
0
        SU = TopCand.SU;
266
0
      }
267
0
      IsTopNode = true;
268
406k
    } else if (RegionPolicy.OnlyBottomUp) {
269
0
      SU = Bot.pickOnlyChoice();
270
0
      if (!SU) {
271
0
        CandPolicy NoPolicy;
272
0
        BotCand.reset(NoPolicy);
273
0
        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
274
0
        assert(BotCand.Reason != NoCand && "failed to find a candidate");
275
0
        SU = BotCand.SU;
276
0
      }
277
0
      IsTopNode = false;
278
406k
    } else {
279
406k
      SU = pickNodeBidirectional(IsTopNode);
280
406k
    }
281
406k
  } while (SU->isScheduled);
282
406k
283
406k
  if (SU->isTopReady())
284
187k
    Top.removeReady(SU);
285
406k
  if (SU->isBottomReady())
286
344k
    Bot.removeReady(SU);
287
406k
288
406k
  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
289
406k
                    << *SU->getInstr());
290
406k
  return SU;
291
406k
}
292
293
GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
294
                        std::unique_ptr<MachineSchedStrategy> S) :
295
  ScheduleDAGMILive(C, std::move(S)),
296
  ST(MF.getSubtarget<GCNSubtarget>()),
297
  MFI(*MF.getInfo<SIMachineFunctionInfo>()),
298
  StartingOccupancy(MFI.getOccupancy()),
299
25.1k
  MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
300
25.1k
301
25.1k
  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
302
25.1k
}
303
304
54.2k
void GCNScheduleDAGMILive::schedule() {
305
54.2k
  if (Stage == 0) {
306
26.9k
    // Just record regions at the first pass.
307
26.9k
    Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
308
26.9k
    return;
309
26.9k
  }
310
27.3k
311
27.3k
  std::vector<MachineInstr*> Unsched;
312
27.3k
  Unsched.reserve(NumRegionInstrs);
313
405k
  for (auto &I : *this) {
314
405k
    Unsched.push_back(&I);
315
405k
  }
316
27.3k
317
27.3k
  GCNRegPressure PressureBefore;
318
27.3k
  if (LIS) {
319
27.3k
    PressureBefore = Pressure[RegionIdx];
320
27.3k
321
27.3k
    LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
322
27.3k
               GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
323
27.3k
               dbgs() << "Region live-in pressure:  ";
324
27.3k
               llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
325
27.3k
               dbgs() << "Region register pressure: ";
326
27.3k
               PressureBefore.print(dbgs()));
327
27.3k
  }
328
27.3k
329
27.3k
  ScheduleDAGMILive::schedule();
330
27.3k
  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
331
27.3k
332
27.3k
  if (!LIS)
333
0
    return;
334
27.3k
335
27.3k
  // Check the results of scheduling.
336
27.3k
  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
337
27.3k
  auto PressureAfter = getRealRegPressure();
338
27.3k
339
27.3k
  LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
340
27.3k
             PressureAfter.print(dbgs()));
341
27.3k
342
27.3k
  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
343
27.3k
      
PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit27.2k
) {
344
26.5k
    Pressure[RegionIdx] = PressureAfter;
345
26.5k
    LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
346
26.5k
    return;
347
26.5k
  }
348
732
  unsigned Occ = MFI.getOccupancy();
349
732
  unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
350
732
  unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
351
732
  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
352
732
                    << ", after " << WavesAfter << ".\n");
353
732
354
732
  // We could not keep current target occupancy because of the just scheduled
355
732
  // region. Record new occupancy for next scheduling cycle.
356
732
  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
357
732
  // Allow memory bound functions to drop to 4 waves if not limited by an
358
732
  // attribute.
359
732
  if (WavesAfter < WavesBefore && 
WavesAfter < MinOccupancy46
&&
360
732
      
WavesAfter >= MFI.getMinAllowedOccupancy()46
) {
361
31
    LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
362
31
                      << MFI.getMinAllowedOccupancy() << " waves\n");
363
31
    NewOccupancy = WavesAfter;
364
31
  }
365
732
  if (NewOccupancy < MinOccupancy) {
366
314
    MinOccupancy = NewOccupancy;
367
314
    MFI.limitOccupancy(MinOccupancy);
368
314
    LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
369
314
                      << MinOccupancy << ".\n");
370
314
  }
371
732
372
732
  if (WavesAfter >= MinOccupancy) {
373
717
    Pressure[RegionIdx] = PressureAfter;
374
717
    return;
375
717
  }
376
15
377
15
  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
378
15
  RegionEnd = RegionBegin;
379
1.57k
  for (MachineInstr *MI : Unsched) {
380
1.57k
    if (MI->isDebugInstr())
381
1
      continue;
382
1.56k
383
1.56k
    if (MI->getIterator() != RegionEnd) {
384
903
      BB->remove(MI);
385
903
      BB->insert(RegionEnd, MI);
386
903
      if (!MI->isDebugInstr())
387
903
        LIS->handleMove(*MI, true);
388
903
    }
389
1.56k
    // Reset read-undef flags and update them later.
390
1.56k
    for (auto &Op : MI->operands())
391
7.77k
      if (Op.isReg() && 
Op.isDef()5.06k
)
392
1.48k
        Op.setIsUndef(false);
393
1.56k
    RegisterOperands RegOpers;
394
1.56k
    RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
395
1.56k
    if (!MI->isDebugInstr()) {
396
1.56k
      if (ShouldTrackLaneMasks) {
397
1.56k
        // Adjust liveness and add missing dead+read-undef flags.
398
1.56k
        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
399
1.56k
        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
400
1.56k
      } else {
401
0
        // Adjust for missing dead-def flags.
402
0
        RegOpers.detectDeadDefs(*MI, *LIS);
403
0
      }
404
1.56k
    }
405
1.56k
    RegionEnd = MI->getIterator();
406
1.56k
    ++RegionEnd;
407
1.56k
    LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
408
1.56k
  }
409
15
  RegionBegin = Unsched.front()->getIterator();
410
15
  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
411
15
412
15
  placeDebugValues();
413
15
}
414
415
27.3k
GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
416
27.3k
  GCNDownwardRPTracker RPTracker(*LIS);
417
27.3k
  RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
418
27.3k
  return RPTracker.moveMaxPressure();
419
27.3k
}
420
421
25.8k
void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
422
25.8k
  GCNDownwardRPTracker RPTracker(*LIS);
423
25.8k
424
25.8k
  // If the block has the only successor then live-ins of that successor are
425
25.8k
  // live-outs of the current block. We can reuse calculated live set if the
426
25.8k
  // successor will be sent to scheduling past current block.
427
25.8k
  const MachineBasicBlock *OnlySucc = nullptr;
428
25.8k
  if (MBB->succ_size() == 1 && 
!(*MBB->succ_begin())->empty()1.39k
) {
429
1.38k
    SlotIndexes *Ind = LIS->getSlotIndexes();
430
1.38k
    if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
431
1.28k
      OnlySucc = *MBB->succ_begin();
432
1.38k
  }
433
25.8k
434
25.8k
  // Scheduler sends regions from the end of the block upwards.
435
25.8k
  size_t CurRegion = RegionIdx;
436
52.8k
  for (size_t E = Regions.size(); CurRegion != E; 
++CurRegion26.9k
)
437
29.1k
    if (Regions[CurRegion].first->getParent() != MBB)
438
2.20k
      break;
439
25.8k
  --CurRegion;
440
25.8k
441
25.8k
  auto I = MBB->begin();
442
25.8k
  auto LiveInIt = MBBLiveIns.find(MBB);
443
25.8k
  if (LiveInIt != MBBLiveIns.end()) {
444
886
    auto LiveIn = std::move(LiveInIt->second);
445
886
    RPTracker.reset(*MBB->begin(), &LiveIn);
446
886
    MBBLiveIns.erase(LiveInIt);
447
25.0k
  } else {
448
25.0k
    auto &Rgn = Regions[CurRegion];
449
25.0k
    I = Rgn.first;
450
25.0k
    auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
451
25.0k
    auto LRS = BBLiveInMap.lookup(NonDbgMI);
452
25.0k
    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
453
25.0k
    RPTracker.reset(*I, &LRS);
454
25.0k
  }
455
25.8k
456
401k
  for ( ; ; ) {
457
401k
    I = RPTracker.getNext();
458
401k
459
401k
    if (Regions[CurRegion].first == I) {
460
26.9k
      LiveIns[CurRegion] = RPTracker.getLiveRegs();
461
26.9k
      RPTracker.clearMaxPressure();
462
26.9k
    }
463
401k
464
401k
    if (Regions[CurRegion].second == I) {
465
26.9k
      Pressure[CurRegion] = RPTracker.moveMaxPressure();
466
26.9k
      if (CurRegion-- == RegionIdx)
467
25.8k
        break;
468
375k
    }
469
375k
    RPTracker.advanceToNext();
470
375k
    RPTracker.advanceBeforeNext();
471
375k
  }
472
25.8k
473
25.8k
  if (OnlySucc) {
474
1.28k
    if (I != MBB->end()) {
475
268
      RPTracker.advanceToNext();
476
268
      RPTracker.advance(MBB->end());
477
268
    }
478
1.28k
    RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
479
1.28k
    RPTracker.advanceBeforeNext();
480
1.28k
    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
481
1.28k
  }
482
25.8k
}
483
484
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
485
23.6k
GCNScheduleDAGMILive::getBBLiveInMap() const {
486
23.6k
  assert(!Regions.empty());
487
23.6k
  std::vector<MachineInstr *> BBStarters;
488
23.6k
  BBStarters.reserve(Regions.size());
489
23.6k
  auto I = Regions.rbegin(), E = Regions.rend();
490
23.6k
  auto *BB = I->first->getParent();
491
26.1k
  do {
492
26.1k
    auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
493
26.1k
    BBStarters.push_back(MI);
494
26.9k
    do {
495
26.9k
      ++I;
496
26.9k
    } while (I != E && 
I->first->getParent() == BB3.22k
);
497
26.1k
  } while (I != E);
498
23.6k
  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
499
23.6k
}
500
501
25.1k
void GCNScheduleDAGMILive::finalizeSchedule() {
502
25.1k
  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
503
25.1k
  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
504
25.1k
505
25.1k
  LiveIns.resize(Regions.size());
506
25.1k
  Pressure.resize(Regions.size());
507
25.1k
508
25.1k
  if (!Regions.empty())
509
23.6k
    BBLiveInMap = getBBLiveInMap();
510
25.1k
511
50.3k
  do {
512
50.3k
    Stage++;
513
50.3k
    RegionIdx = 0;
514
50.3k
    MachineBasicBlock *MBB = nullptr;
515
50.3k
516
50.3k
    if (Stage > 1) {
517
25.1k
      // Retry function scheduling if we found resulting occupancy and it is
518
25.1k
      // lower than used for first pass scheduling. This will give more freedom
519
25.1k
      // to schedule low register pressure blocks.
520
25.1k
      // Code is partially copied from MachineSchedulerBase::scheduleRegions().
521
25.1k
522
25.1k
      if (!LIS || StartingOccupancy <= MinOccupancy)
523
24.8k
        break;
524
302
525
302
      LLVM_DEBUG(
526
302
          dbgs()
527
302
          << "Retrying function scheduling with lowest recorded occupancy "
528
302
          << MinOccupancy << ".\n");
529
302
530
302
      S.setTargetOccupancy(MinOccupancy);
531
302
    }
532
50.3k
533
50.3k
    
for (auto Region : Regions)25.4k
{
534
27.3k
      RegionBegin = Region.first;
535
27.3k
      RegionEnd = Region.second;
536
27.3k
537
27.3k
      if (RegionBegin->getParent() != MBB) {
538
26.2k
        if (MBB) 
finishBlock()2.28k
;
539
26.2k
        MBB = RegionBegin->getParent();
540
26.2k
        startBlock(MBB);
541
26.2k
        if (Stage == 1)
542
25.8k
          computeBlockPressure(MBB);
543
26.2k
      }
544
27.3k
545
27.3k
      unsigned NumRegionInstrs = std::distance(begin(), end());
546
27.3k
      enterRegion(MBB, begin(), end(), NumRegionInstrs);
547
27.3k
548
27.3k
      // Skip empty scheduling regions (0 or 1 schedulable instructions).
549
27.3k
      if (begin() == end() || begin() == std::prev(end())) {
550
0
        exitRegion();
551
0
        continue;
552
0
      }
553
27.3k
554
27.3k
      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
555
27.3k
      LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
556
27.3k
                        << MBB->getName() << "\n  From: " << *begin()
557
27.3k
                        << "    To: ";
558
27.3k
                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
559
27.3k
                 else dbgs() << "End";
560
27.3k
                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
561
27.3k
562
27.3k
      schedule();
563
27.3k
564
27.3k
      exitRegion();
565
27.3k
      ++RegionIdx;
566
27.3k
    }
567
25.4k
    finishBlock();
568
25.4k
569
25.4k
  } while (Stage < 2);
570
25.1k
}