Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/PostRASchedulerList.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This implements a top-down list scheduler, using standard algorithms.
10
// The basic approach uses a priority queue of available nodes to schedule.
11
// One at a time, nodes are taken from the priority queue (thus in priority
12
// order), checked for legality to schedule, and emitted if legal.
13
//
14
// Nodes may not be legal to schedule either due to structural hazards (e.g.
15
// pipeline or resource constraints) or because an input to the instruction has
16
// not completed execution.
17
//
18
//===----------------------------------------------------------------------===//
19
20
#include "AggressiveAntiDepBreaker.h"
21
#include "AntiDepBreaker.h"
22
#include "CriticalAntiDepBreaker.h"
23
#include "llvm/ADT/Statistic.h"
24
#include "llvm/Analysis/AliasAnalysis.h"
25
#include "llvm/CodeGen/LatencyPriorityQueue.h"
26
#include "llvm/CodeGen/MachineDominators.h"
27
#include "llvm/CodeGen/MachineFunctionPass.h"
28
#include "llvm/CodeGen/MachineLoopInfo.h"
29
#include "llvm/CodeGen/MachineRegisterInfo.h"
30
#include "llvm/CodeGen/Passes.h"
31
#include "llvm/CodeGen/RegisterClassInfo.h"
32
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
33
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
34
#include "llvm/CodeGen/SchedulerRegistry.h"
35
#include "llvm/CodeGen/TargetInstrInfo.h"
36
#include "llvm/CodeGen/TargetLowering.h"
37
#include "llvm/CodeGen/TargetPassConfig.h"
38
#include "llvm/CodeGen/TargetRegisterInfo.h"
39
#include "llvm/CodeGen/TargetSubtargetInfo.h"
40
#include "llvm/Config/llvm-config.h"
41
#include "llvm/Support/CommandLine.h"
42
#include "llvm/Support/Debug.h"
43
#include "llvm/Support/ErrorHandling.h"
44
#include "llvm/Support/raw_ostream.h"
45
using namespace llvm;
46
47
#define DEBUG_TYPE "post-RA-sched"
48
49
STATISTIC(NumNoops, "Number of noops inserted");
50
STATISTIC(NumStalls, "Number of pipeline stalls");
51
STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
52
53
// Post-RA scheduling is enabled with
54
// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
55
// override the target.
56
static cl::opt<bool>
57
EnablePostRAScheduler("post-RA-scheduler",
58
                       cl::desc("Enable scheduling after register allocation"),
59
                       cl::init(false), cl::Hidden);
60
static cl::opt<std::string>
61
EnableAntiDepBreaking("break-anti-dependencies",
62
                      cl::desc("Break post-RA scheduling anti-dependencies: "
63
                               "\"critical\", \"all\", or \"none\""),
64
                      cl::init("none"), cl::Hidden);
65
66
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
67
static cl::opt<int>
68
DebugDiv("postra-sched-debugdiv",
69
                      cl::desc("Debug control MBBs that are scheduled"),
70
                      cl::init(0), cl::Hidden);
71
static cl::opt<int>
72
DebugMod("postra-sched-debugmod",
73
                      cl::desc("Debug control MBBs that are scheduled"),
74
                      cl::init(0), cl::Hidden);
75
76
4.49k
AntiDepBreaker::~AntiDepBreaker() { }
77
78
namespace {
79
  class PostRAScheduler : public MachineFunctionPass {
80
    const TargetInstrInfo *TII;
81
    RegisterClassInfo RegClassInfo;
82
83
  public:
84
    static char ID;
85
21.5k
    PostRAScheduler() : MachineFunctionPass(ID) {}
86
87
21.4k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
88
21.4k
      AU.setPreservesCFG();
89
21.4k
      AU.addRequired<AAResultsWrapperPass>();
90
21.4k
      AU.addRequired<TargetPassConfig>();
91
21.4k
      AU.addRequired<MachineDominatorTree>();
92
21.4k
      AU.addPreserved<MachineDominatorTree>();
93
21.4k
      AU.addRequired<MachineLoopInfo>();
94
21.4k
      AU.addPreserved<MachineLoopInfo>();
95
21.4k
      MachineFunctionPass::getAnalysisUsage(AU);
96
21.4k
    }
97
98
21.4k
    MachineFunctionProperties getRequiredProperties() const override {
99
21.4k
      return MachineFunctionProperties().set(
100
21.4k
          MachineFunctionProperties::Property::NoVRegs);
101
21.4k
    }
102
103
    bool runOnMachineFunction(MachineFunction &Fn) override;
104
105
  private:
106
    bool enablePostRAScheduler(
107
        const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
108
        TargetSubtargetInfo::AntiDepBreakMode &Mode,
109
        TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
110
  };
111
  char PostRAScheduler::ID = 0;
112
113
  class SchedulePostRATDList : public ScheduleDAGInstrs {
114
    /// AvailableQueue - The priority queue to use for the available SUnits.
115
    ///
116
    LatencyPriorityQueue AvailableQueue;
117
118
    /// PendingQueue - This contains all of the instructions whose operands have
119
    /// been issued, but their results are not ready yet (due to the latency of
120
    /// the operation).  Once the operands becomes available, the instruction is
121
    /// added to the AvailableQueue.
122
    std::vector<SUnit*> PendingQueue;
123
124
    /// HazardRec - The hazard recognizer to use.
125
    ScheduleHazardRecognizer *HazardRec;
126
127
    /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
128
    AntiDepBreaker *AntiDepBreak;
129
130
    /// AA - AliasAnalysis for making memory reference queries.
131
    AliasAnalysis *AA;
132
133
    /// The schedule. Null SUnit*'s represent noop instructions.
134
    std::vector<SUnit*> Sequence;
135
136
    /// Ordered list of DAG postprocessing steps.
137
    std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
138
139
    /// The index in BB of RegionEnd.
140
    ///
141
    /// This is the instruction number from the top of the current block, not
142
    /// the SlotIndex. It is only used by the AntiDepBreaker.
143
    unsigned EndIndex;
144
145
  public:
146
    SchedulePostRATDList(
147
        MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
148
        const RegisterClassInfo &,
149
        TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
150
        SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
151
152
    ~SchedulePostRATDList() override;
153
154
    /// startBlock - Initialize register live-range state for scheduling in
155
    /// this block.
156
    ///
157
    void startBlock(MachineBasicBlock *BB) override;
158
159
    // Set the index of RegionEnd within the current BB.
160
298k
    void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
161
162
    /// Initialize the scheduler state for the next scheduling region.
163
    void enterRegion(MachineBasicBlock *bb,
164
                     MachineBasicBlock::iterator begin,
165
                     MachineBasicBlock::iterator end,
166
                     unsigned regioninstrs) override;
167
168
    /// Notify that the scheduler has finished scheduling the current region.
169
    void exitRegion() override;
170
171
    /// Schedule - Schedule the instruction range using list scheduling.
172
    ///
173
    void schedule() override;
174
175
    void EmitSchedule();
176
177
    /// Observe - Update liveness information to account for the current
178
    /// instruction, which will not be scheduled.
179
    ///
180
    void Observe(MachineInstr &MI, unsigned Count);
181
182
    /// finishBlock - Clean up register live-range state.
183
    ///
184
    void finishBlock() override;
185
186
  private:
187
    /// Apply each ScheduleDAGMutation step in order.
188
    void postprocessDAG();
189
190
    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
191
    void ReleaseSuccessors(SUnit *SU);
192
    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
193
    void ListScheduleTopDown();
194
195
    void dumpSchedule() const;
196
    void emitNoop(unsigned CurCycle);
197
  };
198
}
199
200
char &llvm::PostRASchedulerID = PostRAScheduler::ID;
201
202
INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE,
203
                "Post RA top-down list latency scheduler", false, false)
204
205
SchedulePostRATDList::SchedulePostRATDList(
206
    MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
207
    const RegisterClassInfo &RCI,
208
    TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
209
    SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
210
45.0k
    : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
211
45.0k
212
45.0k
  const InstrItineraryData *InstrItins =
213
45.0k
      MF.getSubtarget().getInstrItineraryData();
214
45.0k
  HazardRec =
215
45.0k
      MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
216
45.0k
          InstrItins, this);
217
45.0k
  MF.getSubtarget().getPostRAMutations(Mutations);
218
45.0k
219
45.0k
  assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
220
45.0k
          MRI.tracksLiveness()) &&
221
45.0k
         "Live-ins must be accurate for anti-dependency breaking");
222
45.0k
  AntiDepBreak =
223
45.0k
    ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
224
3.35k
     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
225
45.0k
     
((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) 41.6k
?
226
40.5k
      
(AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI)1.13k
: nullptr));
227
45.0k
}
228
229
45.0k
SchedulePostRATDList::~SchedulePostRATDList() {
230
45.0k
  delete HazardRec;
231
45.0k
  delete AntiDepBreak;
232
45.0k
}
233
234
/// Initialize state associated with the next scheduling region.
235
void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
236
                 MachineBasicBlock::iterator begin,
237
                 MachineBasicBlock::iterator end,
238
298k
                 unsigned regioninstrs) {
239
298k
  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
240
298k
  Sequence.clear();
241
298k
}
242
243
/// Print the schedule before exiting the region.
244
298k
void SchedulePostRATDList::exitRegion() {
245
298k
  LLVM_DEBUG({
246
298k
    dbgs() << "*** Final schedule ***\n";
247
298k
    dumpSchedule();
248
298k
    dbgs() << '\n';
249
298k
  });
250
298k
  ScheduleDAGInstrs::exitRegion();
251
298k
}
252
253
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
254
/// dumpSchedule - dump the scheduled Sequence.
255
LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
256
  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
257
    if (SUnit *SU = Sequence[i])
258
      dumpNode(*SU);
259
    else
260
      dbgs() << "**** NOOP ****\n";
261
  }
262
}
263
#endif
264
265
bool PostRAScheduler::enablePostRAScheduler(
266
    const TargetSubtargetInfo &ST,
267
    CodeGenOpt::Level OptLevel,
268
    TargetSubtargetInfo::AntiDepBreakMode &Mode,
269
204k
    TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
270
204k
  Mode = ST.getAntiDepBreakMode();
271
204k
  ST.getCriticalPathRCs(CriticalPathRCs);
272
204k
273
204k
  // Check for explicit enable/disable of post-ra scheduling.
274
204k
  if (EnablePostRAScheduler.getPosition() > 0)
275
90
    return EnablePostRAScheduler;
276
204k
277
204k
  return ST.enablePostRAScheduler() &&
278
204k
         
OptLevel >= ST.getOptLevelToEnablePostRAScheduler()57.5k
;
279
204k
}
280
281
204k
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
282
204k
  if (skipFunction(Fn.getFunction()))
283
233
    return false;
284
204k
285
204k
  TII = Fn.getSubtarget().getInstrInfo();
286
204k
  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
287
204k
  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
288
204k
  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
289
204k
290
204k
  RegClassInfo.runOnMachineFunction(Fn);
291
204k
292
204k
  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
293
204k
    TargetSubtargetInfo::ANTIDEP_NONE;
294
204k
  SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
295
204k
296
204k
  // Check that post-RA scheduling is enabled for this target.
297
204k
  // This may upgrade the AntiDepMode.
298
204k
  if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
299
204k
                             AntiDepMode, CriticalPathRCs))
300
159k
    return false;
301
45.0k
302
45.0k
  // Check for antidep breaking override...
303
45.0k
  if (EnableAntiDepBreaking.getPosition() > 0) {
304
4
    AntiDepMode = (EnableAntiDepBreaking == "all")
305
4
      ? 
TargetSubtargetInfo::ANTIDEP_ALL0
306
4
      : ((EnableAntiDepBreaking == "critical")
307
4
         ? 
TargetSubtargetInfo::ANTIDEP_CRITICAL3
308
4
         : 
TargetSubtargetInfo::ANTIDEP_NONE1
);
309
4
  }
310
45.0k
311
45.0k
  LLVM_DEBUG(dbgs() << "PostRAScheduler\n");
312
45.0k
313
45.0k
  SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
314
45.0k
                                 CriticalPathRCs);
315
45.0k
316
45.0k
  // Loop over all of the basic blocks
317
99.3k
  for (auto &MBB : Fn) {
318
#ifndef NDEBUG
319
    // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
320
    if (DebugDiv > 0) {
321
      static int bbcnt = 0;
322
      if (bbcnt++ % DebugDiv != DebugMod)
323
        continue;
324
      dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":"
325
             << printMBBReference(MBB) << " ***\n";
326
    }
327
#endif
328
329
99.3k
    // Initialize register live-range state for scheduling in this block.
330
99.3k
    Scheduler.startBlock(&MBB);
331
99.3k
332
99.3k
    // Schedule each sequence of instructions not interrupted by a label
333
99.3k
    // or anything else that effectively needs to shut down scheduling.
334
99.3k
    MachineBasicBlock::iterator Current = MBB.end();
335
99.3k
    unsigned Count = MBB.size(), CurrentCount = Count;
336
951k
    for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
337
852k
      MachineInstr &MI = *std::prev(I);
338
852k
      --Count;
339
852k
      // Calls are not scheduling boundaries before register allocation, but
340
852k
      // post-ra we don't gain anything by scheduling across calls since we
341
852k
      // don't need to worry about register pressure.
342
852k
      if (MI.isCall() || 
TII->isSchedulingBoundary(MI, &MBB, Fn)813k
) {
343
198k
        Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
344
198k
        Scheduler.setEndIndex(CurrentCount);
345
198k
        Scheduler.schedule();
346
198k
        Scheduler.exitRegion();
347
198k
        Scheduler.EmitSchedule();
348
198k
        Current = &MI;
349
198k
        CurrentCount = Count;
350
198k
        Scheduler.Observe(MI, CurrentCount);
351
198k
      }
352
852k
      I = MI;
353
852k
      if (MI.isBundle())
354
8.15k
        Count -= MI.getBundleSize();
355
852k
    }
356
99.3k
    assert(Count == 0 && "Instruction count mismatch!");
357
99.3k
    assert((MBB.begin() == Current || CurrentCount != 0) &&
358
99.3k
           "Instruction count mismatch!");
359
99.3k
    Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
360
99.3k
    Scheduler.setEndIndex(CurrentCount);
361
99.3k
    Scheduler.schedule();
362
99.3k
    Scheduler.exitRegion();
363
99.3k
    Scheduler.EmitSchedule();
364
99.3k
365
99.3k
    // Clean up register live-range state.
366
99.3k
    Scheduler.finishBlock();
367
99.3k
368
99.3k
    // Update register kills
369
99.3k
    Scheduler.fixupKills(MBB);
370
99.3k
  }
371
45.0k
372
45.0k
  return true;
373
45.0k
}
374
375
/// StartBlock - Initialize register live-range state for scheduling in
376
/// this block.
377
///
378
99.3k
void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
379
99.3k
  // Call the superclass.
380
99.3k
  ScheduleDAGInstrs::startBlock(BB);
381
99.3k
382
99.3k
  // Reset the hazard recognizer and anti-dep breaker.
383
99.3k
  HazardRec->Reset();
384
99.3k
  if (AntiDepBreak)
385
6.48k
    AntiDepBreak->StartBlock(BB);
386
99.3k
}
387
388
/// Schedule - Schedule the instruction range using list scheduling.
389
///
390
298k
void SchedulePostRATDList::schedule() {
391
298k
  // Build the scheduling graph.
392
298k
  buildSchedGraph(AA);
393
298k
394
298k
  if (AntiDepBreak) {
395
14.5k
    unsigned Broken =
396
14.5k
      AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
397
14.5k
                                          EndIndex, DbgValues);
398
14.5k
399
14.5k
    if (Broken != 0) {
400
371
      // We made changes. Update the dependency graph.
401
371
      // Theoretically we could update the graph in place:
402
371
      // When a live range is changed to use a different register, remove
403
371
      // the def's anti-dependence *and* output-dependence edges due to
404
371
      // that register, and add new anti-dependence and output-dependence
405
371
      // edges based on the next live range of the register.
406
371
      ScheduleDAG::clearDAG();
407
371
      buildSchedGraph(AA);
408
371
409
371
      NumFixedAnti += Broken;
410
371
    }
411
14.5k
  }
412
298k
413
298k
  postprocessDAG();
414
298k
415
298k
  LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
416
298k
  LLVM_DEBUG(dump());
417
298k
418
298k
  AvailableQueue.initNodes(SUnits);
419
298k
  ListScheduleTopDown();
420
298k
  AvailableQueue.releaseState();
421
298k
}
422
423
/// Observe - Update liveness information to account for the current
424
/// instruction, which will not be scheduled.
425
///
426
198k
void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) {
427
198k
  if (AntiDepBreak)
428
8.02k
    AntiDepBreak->Observe(MI, Count, EndIndex);
429
198k
}
430
431
/// FinishBlock - Clean up register live-range state.
432
///
433
99.3k
void SchedulePostRATDList::finishBlock() {
434
99.3k
  if (AntiDepBreak)
435
6.48k
    AntiDepBreak->FinishBlock();
436
99.3k
437
99.3k
  // Call the superclass.
438
99.3k
  ScheduleDAGInstrs::finishBlock();
439
99.3k
}
440
441
/// Apply each ScheduleDAGMutation step in order.
442
298k
void SchedulePostRATDList::postprocessDAG() {
443
298k
  for (auto &M : Mutations)
444
136k
    M->apply(this);
445
298k
}
446
447
//===----------------------------------------------------------------------===//
448
//  Top-Down Scheduling
449
//===----------------------------------------------------------------------===//
450
451
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
452
/// the PendingQueue if the count reaches zero.
453
1.92M
void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
454
1.92M
  SUnit *SuccSU = SuccEdge->getSUnit();
455
1.92M
456
1.92M
  if (SuccEdge->isWeak()) {
457
13
    --SuccSU->WeakPredsLeft;
458
13
    return;
459
13
  }
460
#ifndef NDEBUG
461
  if (SuccSU->NumPredsLeft == 0) {
462
    dbgs() << "*** Scheduling failed! ***\n";
463
    dumpNode(*SuccSU);
464
    dbgs() << " has been released too many times!\n";
465
    llvm_unreachable(nullptr);
466
  }
467
#endif
468
1.92M
  --SuccSU->NumPredsLeft;
469
1.92M
470
1.92M
  // Standard scheduler algorithms will recompute the depth of the successor
471
1.92M
  // here as such:
472
1.92M
  //   SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
473
1.92M
  //
474
1.92M
  // However, we lazily compute node depth instead. Note that
475
1.92M
  // ScheduleNodeTopDown has already updated the depth of this node which causes
476
1.92M
  // all descendents to be marked dirty. Setting the successor depth explicitly
477
1.92M
  // here would cause depth to be recomputed for all its ancestors. If the
478
1.92M
  // successor is not yet ready (because of a transitively redundant edge) then
479
1.92M
  // this causes depth computation to be quadratic in the size of the DAG.
480
1.92M
481
1.92M
  // If all the node's predecessors are scheduled, this node is ready
482
1.92M
  // to be scheduled. Ignore the special ExitSU node.
483
1.92M
  if (SuccSU->NumPredsLeft == 0 && 
SuccSU != &ExitSU522k
)
484
436k
    PendingQueue.push_back(SuccSU);
485
1.92M
}
486
487
/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
488
951k
void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
489
951k
  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
490
2.87M
       I != E; 
++I1.92M
) {
491
1.92M
    ReleaseSucc(SU, &*I);
492
1.92M
  }
493
951k
}
494
495
/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
496
/// count of its successors. If a successor pending count is zero, add it to
497
/// the Available queue.
498
653k
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
499
653k
  LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
500
653k
  LLVM_DEBUG(dumpNode(*SU));
501
653k
502
653k
  Sequence.push_back(SU);
503
653k
  assert(CurCycle >= SU->getDepth() &&
504
653k
         "Node scheduled above its depth!");
505
653k
  SU->setDepthToAtLeast(CurCycle);
506
653k
507
653k
  ReleaseSuccessors(SU);
508
653k
  SU->isScheduled = true;
509
653k
  AvailableQueue.scheduledNode(SU);
510
653k
}
511
512
/// emitNoop - Add a noop to the current instruction sequence.
513
2.53k
void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
514
2.53k
  LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
515
2.53k
  HazardRec->EmitNoop();
516
2.53k
  Sequence.push_back(nullptr);   // NULL here means noop
517
2.53k
  ++NumNoops;
518
2.53k
}
519
520
/// ListScheduleTopDown - The main loop of list scheduling for top-down
521
/// schedulers.
522
298k
void SchedulePostRATDList::ListScheduleTopDown() {
523
298k
  unsigned CurCycle = 0;
524
298k
525
298k
  // We're scheduling top-down but we're visiting the regions in
526
298k
  // bottom-up order, so we don't know the hazards at the start of a
527
298k
  // region. So assume no hazards (this should usually be ok as most
528
298k
  // blocks are a single region).
529
298k
  HazardRec->Reset();
530
298k
531
298k
  // Release any successors of the special Entry node.
532
298k
  ReleaseSuccessors(&EntrySU);
533
298k
534
298k
  // Add all leaves to Available queue.
535
951k
  for (unsigned i = 0, e = SUnits.size(); i != e; 
++i653k
) {
536
653k
    // It is available if it has no predecessors.
537
653k
    if (!SUnits[i].NumPredsLeft && 
!SUnits[i].isAvailable216k
) {
538
216k
      AvailableQueue.push(&SUnits[i]);
539
216k
      SUnits[i].isAvailable = true;
540
216k
    }
541
653k
  }
542
298k
543
298k
  // In any cycle where we can't schedule any instructions, we must
544
298k
  // stall or emit a noop, depending on the target.
545
298k
  bool CycleHasInsts = false;
546
298k
547
298k
  // While Available queue is not empty, grab the node with the highest
548
298k
  // priority. If it is not ready put it back.  Schedule the node.
549
298k
  std::vector<SUnit*> NotReady;
550
298k
  Sequence.reserve(SUnits.size());
551
2.01M
  while (!AvailableQueue.empty() || 
!PendingQueue.empty()1.57M
) {
552
1.71M
    // Check to see if any of the pending instructions are ready to issue.  If
553
1.71M
    // so, add them to the available queue.
554
1.71M
    unsigned MinDepth = ~0u;
555
3.70M
    for (unsigned i = 0, e = PendingQueue.size(); i != e; 
++i1.99M
) {
556
1.99M
      if (PendingQueue[i]->getDepth() <= CurCycle) {
557
436k
        AvailableQueue.push(PendingQueue[i]);
558
436k
        PendingQueue[i]->isAvailable = true;
559
436k
        PendingQueue[i] = PendingQueue.back();
560
436k
        PendingQueue.pop_back();
561
436k
        --i; --e;
562
1.55M
      } else if (PendingQueue[i]->getDepth() < MinDepth)
563
1.20M
        MinDepth = PendingQueue[i]->getDepth();
564
1.99M
    }
565
1.71M
566
1.71M
    LLVM_DEBUG(dbgs() << "\n*** Examining Available\n";
567
1.71M
               AvailableQueue.dump(this));
568
1.71M
569
1.71M
    SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
570
1.71M
    bool HasNoopHazards = false;
571
1.79M
    while (!AvailableQueue.empty()) {
572
727k
      SUnit *CurSUnit = AvailableQueue.pop();
573
727k
574
727k
      ScheduleHazardRecognizer::HazardType HT =
575
727k
        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
576
727k
      if (HT == ScheduleHazardRecognizer::NoHazard) {
577
653k
        if (HazardRec->ShouldPreferAnother(CurSUnit)) {
578
722
          if (!NotPreferredSUnit) {
579
487
            // If this is the first non-preferred node for this cycle, then
580
487
            // record it and continue searching for a preferred node. If this
581
487
            // is not the first non-preferred node, then treat it as though
582
487
            // there had been a hazard.
583
487
            NotPreferredSUnit = CurSUnit;
584
487
            continue;
585
487
          }
586
652k
        } else {
587
652k
          FoundSUnit = CurSUnit;
588
652k
          break;
589
652k
        }
590
74.3k
      }
591
74.3k
592
74.3k
      // Remember if this is a noop hazard.
593
74.3k
      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
594
74.3k
595
74.3k
      NotReady.push_back(CurSUnit);
596
74.3k
    }
597
1.71M
598
1.71M
    // If we have a non-preferred node, push it back onto the available list.
599
1.71M
    // If we did not find a preferred node, then schedule this first
600
1.71M
    // non-preferred node.
601
1.71M
    if (NotPreferredSUnit) {
602
487
      if (!FoundSUnit) {
603
275
        LLVM_DEBUG(
604
275
            dbgs() << "*** Will schedule a non-preferred instruction...\n");
605
275
        FoundSUnit = NotPreferredSUnit;
606
275
      } else {
607
212
        AvailableQueue.push(NotPreferredSUnit);
608
212
      }
609
487
610
487
      NotPreferredSUnit = nullptr;
611
487
    }
612
1.71M
613
1.71M
    // Add the nodes that aren't ready back onto the available list.
614
1.71M
    if (!NotReady.empty()) {
615
36.3k
      AvailableQueue.push_all(NotReady);
616
36.3k
      NotReady.clear();
617
36.3k
    }
618
1.71M
619
1.71M
    // If we found a node to schedule...
620
1.71M
    if (FoundSUnit) {
621
653k
      // If we need to emit noops prior to this instruction, then do so.
622
653k
      unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
623
653k
      for (unsigned i = 0; i != NumPreNoops; 
++i0
)
624
0
        emitNoop(CurCycle);
625
653k
626
653k
      // ... schedule the node...
627
653k
      ScheduleNodeTopDown(FoundSUnit, CurCycle);
628
653k
      HazardRec->EmitInstruction(FoundSUnit);
629
653k
      CycleHasInsts = true;
630
653k
      if (HazardRec->atIssueLimit()) {
631
366k
        LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle
632
366k
                          << '\n');
633
366k
        HazardRec->AdvanceCycle();
634
366k
        ++CurCycle;
635
366k
        CycleHasInsts = false;
636
366k
      }
637
1.06M
    } else {
638
1.06M
      if (CycleHasInsts) {
639
99.1k
        LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
640
99.1k
        HazardRec->AdvanceCycle();
641
963k
      } else if (!HasNoopHazards) {
642
960k
        // Otherwise, we have a pipeline stall, but no other problem,
643
960k
        // just advance the current cycle and try again.
644
960k
        LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
645
960k
        HazardRec->AdvanceCycle();
646
960k
        ++NumStalls;
647
960k
      } else {
648
2.53k
        // Otherwise, we have no instructions to issue and we have instructions
649
2.53k
        // that will fault if we don't do this right.  This is the case for
650
2.53k
        // processors without pipeline interlocks and other cases.
651
2.53k
        emitNoop(CurCycle);
652
2.53k
      }
653
1.06M
654
1.06M
      ++CurCycle;
655
1.06M
      CycleHasInsts = false;
656
1.06M
    }
657
1.71M
  }
658
298k
659
#ifndef NDEBUG
660
  unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
661
  unsigned Noops = 0;
662
  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
663
    if (!Sequence[i])
664
      ++Noops;
665
  assert(Sequence.size() - Noops == ScheduledNodes &&
666
         "The number of nodes scheduled doesn't match the expected number!");
667
#endif // NDEBUG
668
}
669
670
// EmitSchedule - Emit the machine code in scheduled order.
671
298k
void SchedulePostRATDList::EmitSchedule() {
672
298k
  RegionBegin = RegionEnd;
673
298k
674
298k
  // If first instruction was a DBG_VALUE then put it back.
675
298k
  if (FirstDbgValue)
676
61
    BB->splice(RegionEnd, BB, FirstDbgValue);
677
298k
678
298k
  // Then re-insert them according to the given schedule.
679
953k
  for (unsigned i = 0, e = Sequence.size(); i != e; 
i++655k
) {
680
655k
    if (SUnit *SU = Sequence[i])
681
653k
      BB->splice(RegionEnd, BB, SU->getInstr());
682
2.53k
    else
683
2.53k
      // Null SUnit* is a noop.
684
2.53k
      TII->insertNoop(*BB, RegionEnd);
685
655k
686
655k
    // Update the Begin iterator, as the first instruction in the block
687
655k
    // may have been scheduled later.
688
655k
    if (i == 0)
689
124k
      RegionBegin = std::prev(RegionEnd);
690
655k
  }
691
298k
692
298k
  // Reinsert any remaining debug_values.
693
298k
  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
694
298k
         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; 
--DI188
) {
695
188
    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
696
188
    MachineInstr *DbgValue = P.first;
697
188
    MachineBasicBlock::iterator OrigPrivMI = P.second;
698
188
    BB->splice(++OrigPrivMI, BB, DbgValue);
699
188
  }
700
298k
  DbgValues.clear();
701
298k
  FirstDbgValue = nullptr;
702
298k
}