Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/PostRASchedulerList.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This implements a top-down list scheduler, using standard algorithms.
11
// The basic approach uses a priority queue of available nodes to schedule.
12
// One at a time, nodes are taken from the priority queue (thus in priority
13
// order), checked for legality to schedule, and emitted if legal.
14
//
15
// Nodes may not be legal to schedule either due to structural hazards (e.g.
16
// pipeline or resource constraints) or because an input to the instruction has
17
// not completed execution.
18
//
19
//===----------------------------------------------------------------------===//
20
21
#include "AggressiveAntiDepBreaker.h"
22
#include "AntiDepBreaker.h"
23
#include "CriticalAntiDepBreaker.h"
24
#include "llvm/ADT/Statistic.h"
25
#include "llvm/Analysis/AliasAnalysis.h"
26
#include "llvm/CodeGen/LatencyPriorityQueue.h"
27
#include "llvm/CodeGen/MachineDominators.h"
28
#include "llvm/CodeGen/MachineFrameInfo.h"
29
#include "llvm/CodeGen/MachineFunctionPass.h"
30
#include "llvm/CodeGen/MachineLoopInfo.h"
31
#include "llvm/CodeGen/MachineRegisterInfo.h"
32
#include "llvm/CodeGen/Passes.h"
33
#include "llvm/CodeGen/RegisterClassInfo.h"
34
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
35
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
36
#include "llvm/CodeGen/SchedulerRegistry.h"
37
#include "llvm/CodeGen/TargetPassConfig.h"
38
#include "llvm/Support/CommandLine.h"
39
#include "llvm/Support/Debug.h"
40
#include "llvm/Support/ErrorHandling.h"
41
#include "llvm/Support/raw_ostream.h"
42
#include "llvm/Target/TargetInstrInfo.h"
43
#include "llvm/Target/TargetLowering.h"
44
#include "llvm/Target/TargetRegisterInfo.h"
45
#include "llvm/Target/TargetSubtargetInfo.h"
46
using namespace llvm;
47
48
#define DEBUG_TYPE "post-RA-sched"
49
50
STATISTIC(NumNoops, "Number of noops inserted");
51
STATISTIC(NumStalls, "Number of pipeline stalls");
52
STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
53
54
// Post-RA scheduling is enabled with
55
// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
56
// override the target.
57
static cl::opt<bool>
58
EnablePostRAScheduler("post-RA-scheduler",
59
                       cl::desc("Enable scheduling after register allocation"),
60
                       cl::init(false), cl::Hidden);
61
static cl::opt<std::string>
62
EnableAntiDepBreaking("break-anti-dependencies",
63
                      cl::desc("Break post-RA scheduling anti-dependencies: "
64
                               "\"critical\", \"all\", or \"none\""),
65
                      cl::init("none"), cl::Hidden);
66
67
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
68
static cl::opt<int>
69
DebugDiv("postra-sched-debugdiv",
70
                      cl::desc("Debug control MBBs that are scheduled"),
71
                      cl::init(0), cl::Hidden);
72
static cl::opt<int>
73
DebugMod("postra-sched-debugmod",
74
                      cl::desc("Debug control MBBs that are scheduled"),
75
                      cl::init(0), cl::Hidden);
76
77
9.62k
AntiDepBreaker::~AntiDepBreaker() { }
78
79
namespace {
80
  class PostRAScheduler : public MachineFunctionPass {
81
    const TargetInstrInfo *TII;
82
    RegisterClassInfo RegClassInfo;
83
84
  public:
85
    static char ID;
86
17.2k
    PostRAScheduler() : MachineFunctionPass(ID) {}
87
88
17.1k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
89
17.1k
      AU.setPreservesCFG();
90
17.1k
      AU.addRequired<AAResultsWrapperPass>();
91
17.1k
      AU.addRequired<TargetPassConfig>();
92
17.1k
      AU.addRequired<MachineDominatorTree>();
93
17.1k
      AU.addPreserved<MachineDominatorTree>();
94
17.1k
      AU.addRequired<MachineLoopInfo>();
95
17.1k
      AU.addPreserved<MachineLoopInfo>();
96
17.1k
      MachineFunctionPass::getAnalysisUsage(AU);
97
17.1k
    }
98
99
17.1k
    MachineFunctionProperties getRequiredProperties() const override {
100
17.1k
      return MachineFunctionProperties().set(
101
17.1k
          MachineFunctionProperties::Property::NoVRegs);
102
17.1k
    }
103
104
    bool runOnMachineFunction(MachineFunction &Fn) override;
105
106
  private:
107
    bool enablePostRAScheduler(
108
        const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
109
        TargetSubtargetInfo::AntiDepBreakMode &Mode,
110
        TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
111
  };
112
  char PostRAScheduler::ID = 0;
113
114
  class SchedulePostRATDList : public ScheduleDAGInstrs {
115
    /// AvailableQueue - The priority queue to use for the available SUnits.
116
    ///
117
    LatencyPriorityQueue AvailableQueue;
118
119
    /// PendingQueue - This contains all of the instructions whose operands have
120
    /// been issued, but their results are not ready yet (due to the latency of
121
    /// the operation).  Once the operands becomes available, the instruction is
122
    /// added to the AvailableQueue.
123
    std::vector<SUnit*> PendingQueue;
124
125
    /// HazardRec - The hazard recognizer to use.
126
    ScheduleHazardRecognizer *HazardRec;
127
128
    /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
129
    AntiDepBreaker *AntiDepBreak;
130
131
    /// AA - AliasAnalysis for making memory reference queries.
132
    AliasAnalysis *AA;
133
134
    /// The schedule. Null SUnit*'s represent noop instructions.
135
    std::vector<SUnit*> Sequence;
136
137
    /// Ordered list of DAG postprocessing steps.
138
    std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
139
140
    /// The index in BB of RegionEnd.
141
    ///
142
    /// This is the instruction number from the top of the current block, not
143
    /// the SlotIndex. It is only used by the AntiDepBreaker.
144
    unsigned EndIndex;
145
146
  public:
147
    SchedulePostRATDList(
148
        MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
149
        const RegisterClassInfo &,
150
        TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
151
        SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
152
153
    ~SchedulePostRATDList() override;
154
155
    /// startBlock - Initialize register live-range state for scheduling in
156
    /// this block.
157
    ///
158
    void startBlock(MachineBasicBlock *BB) override;
159
160
    // Set the index of RegionEnd within the current BB.
161
185k
    void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
162
163
    /// Initialize the scheduler state for the next scheduling region.
164
    void enterRegion(MachineBasicBlock *bb,
165
                     MachineBasicBlock::iterator begin,
166
                     MachineBasicBlock::iterator end,
167
                     unsigned regioninstrs) override;
168
169
    /// Notify that the scheduler has finished scheduling the current region.
170
    void exitRegion() override;
171
172
    /// Schedule - Schedule the instruction range using list scheduling.
173
    ///
174
    void schedule() override;
175
176
    void EmitSchedule();
177
178
    /// Observe - Update liveness information to account for the current
179
    /// instruction, which will not be scheduled.
180
    ///
181
    void Observe(MachineInstr &MI, unsigned Count);
182
183
    /// finishBlock - Clean up register live-range state.
184
    ///
185
    void finishBlock() override;
186
187
  private:
188
    /// Apply each ScheduleDAGMutation step in order.
189
    void postprocessDAG();
190
191
    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
192
    void ReleaseSuccessors(SUnit *SU);
193
    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
194
    void ListScheduleTopDown();
195
196
    void dumpSchedule() const;
197
    void emitNoop(unsigned CurCycle);
198
  };
199
}
200
201
char &llvm::PostRASchedulerID = PostRAScheduler::ID;
202
203
INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE,
204
                "Post RA top-down list latency scheduler", false, false)
205
206
SchedulePostRATDList::SchedulePostRATDList(
207
    MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
208
    const RegisterClassInfo &RCI,
209
    TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
210
    SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
211
33.9k
    : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
212
33.9k
213
33.9k
  const InstrItineraryData *InstrItins =
214
33.9k
      MF.getSubtarget().getInstrItineraryData();
215
33.9k
  HazardRec =
216
33.9k
      MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
217
33.9k
          InstrItins, this);
218
33.9k
  MF.getSubtarget().getPostRAMutations(Mutations);
219
33.9k
220
33.9k
  assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
221
33.9k
          MRI.tracksLiveness()) &&
222
33.9k
         "Live-ins must be accurate for anti-dependency breaking");
223
33.9k
  AntiDepBreak =
224
33.9k
    ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
225
7.52k
     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
226
26.3k
     ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
227
26.3k
      
(AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI)2.09k
:
nullptr24.2k
));
228
33.9k
}
229
230
33.9k
SchedulePostRATDList::~SchedulePostRATDList() {
231
33.9k
  delete HazardRec;
232
33.9k
  delete AntiDepBreak;
233
33.9k
}
234
235
/// Initialize state associated with the next scheduling region.
236
void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
237
                 MachineBasicBlock::iterator begin,
238
                 MachineBasicBlock::iterator end,
239
185k
                 unsigned regioninstrs) {
240
185k
  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
241
185k
  Sequence.clear();
242
185k
}
243
244
/// Print the schedule before exiting the region.
245
185k
void SchedulePostRATDList::exitRegion() {
246
185k
  DEBUG({
247
185k
      dbgs() << "*** Final schedule ***\n";
248
185k
      dumpSchedule();
249
185k
      dbgs() << '\n';
250
185k
    });
251
185k
  ScheduleDAGInstrs::exitRegion();
252
185k
}
253
254
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
255
/// dumpSchedule - dump the scheduled Sequence.
256
LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
257
  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
258
    if (SUnit *SU = Sequence[i])
259
      SU->dump(this);
260
    else
261
      dbgs() << "**** NOOP ****\n";
262
  }
263
}
264
#endif
265
266
bool PostRAScheduler::enablePostRAScheduler(
267
    const TargetSubtargetInfo &ST,
268
    CodeGenOpt::Level OptLevel,
269
    TargetSubtargetInfo::AntiDepBreakMode &Mode,
270
124k
    TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
271
124k
  Mode = ST.getAntiDepBreakMode();
272
124k
  ST.getCriticalPathRCs(CriticalPathRCs);
273
124k
274
124k
  // Check for explicit enable/disable of post-ra scheduling.
275
124k
  if (EnablePostRAScheduler.getPosition() > 0)
276
44
    return EnablePostRAScheduler;
277
124k
278
124k
  return ST.enablePostRAScheduler() &&
279
46.2k
         OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
280
124k
}
281
282
124k
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
283
124k
  if (skipFunction(*Fn.getFunction()))
284
43
    return false;
285
124k
286
124k
  TII = Fn.getSubtarget().getInstrInfo();
287
124k
  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
288
124k
  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
289
124k
  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
290
124k
291
124k
  RegClassInfo.runOnMachineFunction(Fn);
292
124k
293
124k
  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
294
124k
    TargetSubtargetInfo::ANTIDEP_NONE;
295
124k
  SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
296
124k
297
124k
  // Check that post-RA scheduling is enabled for this target.
298
124k
  // This may upgrade the AntiDepMode.
299
124k
  if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
300
124k
                             AntiDepMode, CriticalPathRCs))
301
90.4k
    return false;
302
33.9k
303
33.9k
  // Check for antidep breaking override...
304
33.9k
  
if (33.9k
EnableAntiDepBreaking.getPosition() > 033.9k
) {
305
6
    AntiDepMode = (EnableAntiDepBreaking == "all")
306
1
      ? TargetSubtargetInfo::ANTIDEP_ALL
307
5
      : ((EnableAntiDepBreaking == "critical")
308
3
         ? TargetSubtargetInfo::ANTIDEP_CRITICAL
309
5
         : TargetSubtargetInfo::ANTIDEP_NONE);
310
6
  }
311
33.9k
312
33.9k
  DEBUG(dbgs() << "PostRAScheduler\n");
313
33.9k
314
33.9k
  SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
315
33.9k
                                 CriticalPathRCs);
316
33.9k
317
33.9k
  // Loop over all of the basic blocks
318
66.3k
  for (auto &MBB : Fn) {
319
#ifndef NDEBUG
320
    // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
321
    if (DebugDiv > 0) {
322
      static int bbcnt = 0;
323
      if (bbcnt++ % DebugDiv != DebugMod)
324
        continue;
325
      dbgs() << "*** DEBUG scheduling " << Fn.getName()
326
             << ":BB#" << MBB.getNumber() << " ***\n";
327
    }
328
#endif
329
330
66.3k
    // Initialize register live-range state for scheduling in this block.
331
66.3k
    Scheduler.startBlock(&MBB);
332
66.3k
333
66.3k
    // Schedule each sequence of instructions not interrupted by a label
334
66.3k
    // or anything else that effectively needs to shut down scheduling.
335
66.3k
    MachineBasicBlock::iterator Current = MBB.end();
336
66.3k
    unsigned Count = MBB.size(), CurrentCount = Count;
337
603k
    for (MachineBasicBlock::iterator I = Current; 
I != MBB.begin()603k
;) {
338
536k
      MachineInstr &MI = *std::prev(I);
339
536k
      --Count;
340
536k
      // Calls are not scheduling boundaries before register allocation, but
341
536k
      // post-ra we don't gain anything by scheduling across calls since we
342
536k
      // don't need to worry about register pressure.
343
536k
      if (
MI.isCall() || 536k
TII->isSchedulingBoundary(MI, &MBB, Fn)519k
) {
344
118k
        Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
345
118k
        Scheduler.setEndIndex(CurrentCount);
346
118k
        Scheduler.schedule();
347
118k
        Scheduler.exitRegion();
348
118k
        Scheduler.EmitSchedule();
349
118k
        Current = &MI;
350
118k
        CurrentCount = Count;
351
118k
        Scheduler.Observe(MI, CurrentCount);
352
118k
      }
353
536k
      I = MI;
354
536k
      if (MI.isBundle())
355
6.01k
        Count -= MI.getBundleSize();
356
536k
    }
357
66.3k
    assert(Count == 0 && "Instruction count mismatch!");
358
66.3k
    assert((MBB.begin() == Current || CurrentCount != 0) &&
359
66.3k
           "Instruction count mismatch!");
360
66.3k
    Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
361
66.3k
    Scheduler.setEndIndex(CurrentCount);
362
66.3k
    Scheduler.schedule();
363
66.3k
    Scheduler.exitRegion();
364
66.3k
    Scheduler.EmitSchedule();
365
66.3k
366
66.3k
    // Clean up register live-range state.
367
66.3k
    Scheduler.finishBlock();
368
66.3k
369
66.3k
    // Update register kills
370
66.3k
    Scheduler.fixupKills(MBB);
371
66.3k
  }
372
124k
373
124k
  return true;
374
124k
}
375
376
/// StartBlock - Initialize register live-range state for scheduling in
377
/// this block.
378
///
379
66.3k
void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
380
66.3k
  // Call the superclass.
381
66.3k
  ScheduleDAGInstrs::startBlock(BB);
382
66.3k
383
66.3k
  // Reset the hazard recognizer and anti-dep breaker.
384
66.3k
  HazardRec->Reset();
385
66.3k
  if (AntiDepBreak)
386
14.6k
    AntiDepBreak->StartBlock(BB);
387
66.3k
}
388
389
/// Schedule - Schedule the instruction range using list scheduling.
390
///
391
185k
void SchedulePostRATDList::schedule() {
392
185k
  // Build the scheduling graph.
393
185k
  buildSchedGraph(AA);
394
185k
395
185k
  if (
AntiDepBreak185k
) {
396
33.3k
    unsigned Broken =
397
33.3k
      AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
398
33.3k
                                          EndIndex, DbgValues);
399
33.3k
400
33.3k
    if (
Broken != 033.3k
) {
401
905
      // We made changes. Update the dependency graph.
402
905
      // Theoretically we could update the graph in place:
403
905
      // When a live range is changed to use a different register, remove
404
905
      // the def's anti-dependence *and* output-dependence edges due to
405
905
      // that register, and add new anti-dependence and output-dependence
406
905
      // edges based on the next live range of the register.
407
905
      ScheduleDAG::clearDAG();
408
905
      buildSchedGraph(AA);
409
905
410
905
      NumFixedAnti += Broken;
411
905
    }
412
33.3k
  }
413
185k
414
185k
  postprocessDAG();
415
185k
416
185k
  DEBUG(dbgs() << "********** List Scheduling **********\n");
417
185k
  DEBUG(
418
185k
    for (const SUnit &SU : SUnits) {
419
185k
      SU.dumpAll(this);
420
185k
      dbgs() << '\n';
421
185k
    }
422
185k
  );
423
185k
424
185k
  AvailableQueue.initNodes(SUnits);
425
185k
  ListScheduleTopDown();
426
185k
  AvailableQueue.releaseState();
427
185k
}
428
429
/// Observe - Update liveness information to account for the current
430
/// instruction, which will not be scheduled.
431
///
432
118k
void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) {
433
118k
  if (AntiDepBreak)
434
18.6k
    AntiDepBreak->Observe(MI, Count, EndIndex);
435
118k
}
436
437
/// FinishBlock - Clean up register live-range state.
438
///
439
66.3k
void SchedulePostRATDList::finishBlock() {
440
66.3k
  if (AntiDepBreak)
441
14.6k
    AntiDepBreak->FinishBlock();
442
66.3k
443
66.3k
  // Call the superclass.
444
66.3k
  ScheduleDAGInstrs::finishBlock();
445
66.3k
}
446
447
/// Apply each ScheduleDAGMutation step in order.
448
185k
void SchedulePostRATDList::postprocessDAG() {
449
185k
  for (auto &M : Mutations)
450
36.3k
    M->apply(this);
451
185k
}
452
453
//===----------------------------------------------------------------------===//
454
//  Top-Down Scheduling
455
//===----------------------------------------------------------------------===//
456
457
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
458
/// the PendingQueue if the count reaches zero.
459
1.36M
void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
460
1.36M
  SUnit *SuccSU = SuccEdge->getSUnit();
461
1.36M
462
1.36M
  if (
SuccEdge->isWeak()1.36M
) {
463
0
    --SuccSU->WeakPredsLeft;
464
0
    return;
465
0
  }
466
#ifndef NDEBUG
467
  if (SuccSU->NumPredsLeft == 0) {
468
    dbgs() << "*** Scheduling failed! ***\n";
469
    SuccSU->dump(this);
470
    dbgs() << " has been released too many times!\n";
471
    llvm_unreachable(nullptr);
472
  }
473
#endif
474
0
  --SuccSU->NumPredsLeft;
475
1.36M
476
1.36M
  // Standard scheduler algorithms will recompute the depth of the successor
477
1.36M
  // here as such:
478
1.36M
  //   SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
479
1.36M
  //
480
1.36M
  // However, we lazily compute node depth instead. Note that
481
1.36M
  // ScheduleNodeTopDown has already updated the depth of this node which causes
482
1.36M
  // all descendents to be marked dirty. Setting the successor depth explicitly
483
1.36M
  // here would cause depth to be recomputed for all its ancestors. If the
484
1.36M
  // successor is not yet ready (because of a transitively redundant edge) then
485
1.36M
  // this causes depth computation to be quadratic in the size of the DAG.
486
1.36M
487
1.36M
  // If all the node's predecessors are scheduled, this node is ready
488
1.36M
  // to be scheduled. Ignore the special ExitSU node.
489
1.36M
  if (
SuccSU->NumPredsLeft == 0 && 1.36M
SuccSU != &ExitSU334k
)
490
281k
    PendingQueue.push_back(SuccSU);
491
1.36M
}
492
493
/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
494
602k
void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
495
602k
  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
496
1.96M
       
I != E1.96M
;
++I1.36M
) {
497
1.36M
    ReleaseSucc(SU, &*I);
498
1.36M
  }
499
602k
}
500
501
/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
502
/// count of its successors. If a successor pending count is zero, add it to
503
/// the Available queue.
504
417k
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
505
417k
  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
506
417k
  DEBUG(SU->dump(this));
507
417k
508
417k
  Sequence.push_back(SU);
509
417k
  assert(CurCycle >= SU->getDepth() &&
510
417k
         "Node scheduled above its depth!");
511
417k
  SU->setDepthToAtLeast(CurCycle);
512
417k
513
417k
  ReleaseSuccessors(SU);
514
417k
  SU->isScheduled = true;
515
417k
  AvailableQueue.scheduledNode(SU);
516
417k
}
517
518
/// emitNoop - Add a noop to the current instruction sequence.
519
1.70k
void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
520
1.70k
  DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
521
1.70k
  HazardRec->EmitNoop();
522
1.70k
  Sequence.push_back(nullptr);   // NULL here means noop
523
1.70k
  ++NumNoops;
524
1.70k
}
525
526
/// ListScheduleTopDown - The main loop of list scheduling for top-down
527
/// schedulers.
528
185k
void SchedulePostRATDList::ListScheduleTopDown() {
529
185k
  unsigned CurCycle = 0;
530
185k
531
185k
  // We're scheduling top-down but we're visiting the regions in
532
185k
  // bottom-up order, so we don't know the hazards at the start of a
533
185k
  // region. So assume no hazards (this should usually be ok as most
534
185k
  // blocks are a single region).
535
185k
  HazardRec->Reset();
536
185k
537
185k
  // Release any successors of the special Entry node.
538
185k
  ReleaseSuccessors(&EntrySU);
539
185k
540
185k
  // Add all leaves to Available queue.
541
602k
  for (unsigned i = 0, e = SUnits.size(); 
i != e602k
;
++i417k
) {
542
417k
    // It is available if it has no predecessors.
543
417k
    if (
!SUnits[i].NumPredsLeft && 417k
!SUnits[i].isAvailable135k
) {
544
135k
      AvailableQueue.push(&SUnits[i]);
545
135k
      SUnits[i].isAvailable = true;
546
135k
    }
547
417k
  }
548
185k
549
185k
  // In any cycle where we can't schedule any instructions, we must
550
185k
  // stall or emit a noop, depending on the target.
551
185k
  bool CycleHasInsts = false;
552
185k
553
185k
  // While Available queue is not empty, grab the node with the highest
554
185k
  // priority. If it is not ready put it back.  Schedule the node.
555
185k
  std::vector<SUnit*> NotReady;
556
185k
  Sequence.reserve(SUnits.size());
557
1.23M
  while (
!AvailableQueue.empty() || 1.23M
!PendingQueue.empty()945k
) {
558
1.04M
    // Check to see if any of the pending instructions are ready to issue.  If
559
1.04M
    // so, add them to the available queue.
560
1.04M
    unsigned MinDepth = ~0u;
561
2.30M
    for (unsigned i = 0, e = PendingQueue.size(); 
i != e2.30M
;
++i1.25M
) {
562
1.25M
      if (
PendingQueue[i]->getDepth() <= CurCycle1.25M
) {
563
281k
        AvailableQueue.push(PendingQueue[i]);
564
281k
        PendingQueue[i]->isAvailable = true;
565
281k
        PendingQueue[i] = PendingQueue.back();
566
281k
        PendingQueue.pop_back();
567
281k
        --i; --e;
568
1.25M
      } else 
if (975k
PendingQueue[i]->getDepth() < MinDepth975k
)
569
731k
        MinDepth = PendingQueue[i]->getDepth();
570
1.25M
    }
571
1.04M
572
1.04M
    DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
573
1.04M
574
1.04M
    SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
575
1.04M
    bool HasNoopHazards = false;
576
1.13M
    while (
!AvailableQueue.empty()1.13M
) {
577
502k
      SUnit *CurSUnit = AvailableQueue.pop();
578
502k
579
502k
      ScheduleHazardRecognizer::HazardType HT =
580
502k
        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
581
502k
      if (
HT == ScheduleHazardRecognizer::NoHazard502k
) {
582
417k
        if (
HazardRec->ShouldPreferAnother(CurSUnit)417k
) {
583
977
          if (
!NotPreferredSUnit977
) {
584
971
            // If this is the first non-preferred node for this cycle, then
585
971
            // record it and continue searching for a preferred node. If this
586
971
            // is not the first non-preferred node, then treat it as though
587
971
            // there had been a hazard.
588
971
            NotPreferredSUnit = CurSUnit;
589
971
            continue;
590
971
          }
591
416k
        } else {
592
416k
          FoundSUnit = CurSUnit;
593
416k
          break;
594
416k
        }
595
84.6k
      }
596
84.6k
597
84.6k
      // Remember if this is a noop hazard.
598
84.6k
      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
599
84.6k
600
84.6k
      NotReady.push_back(CurSUnit);
601
84.6k
    }
602
1.04M
603
1.04M
    // If we have a non-preferred node, push it back onto the available list.
604
1.04M
    // If we did not find a preferred node, then schedule this first
605
1.04M
    // non-preferred node.
606
1.04M
    if (
NotPreferredSUnit1.04M
) {
607
971
      if (
!FoundSUnit971
) {
608
927
        DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n");
609
927
        FoundSUnit = NotPreferredSUnit;
610
971
      } else {
611
44
        AvailableQueue.push(NotPreferredSUnit);
612
44
      }
613
971
614
971
      NotPreferredSUnit = nullptr;
615
971
    }
616
1.04M
617
1.04M
    // Add the nodes that aren't ready back onto the available list.
618
1.04M
    if (
!NotReady.empty()1.04M
) {
619
35.6k
      AvailableQueue.push_all(NotReady);
620
35.6k
      NotReady.clear();
621
35.6k
    }
622
1.04M
623
1.04M
    // If we found a node to schedule...
624
1.04M
    if (
FoundSUnit1.04M
) {
625
417k
      // If we need to emit noops prior to this instruction, then do so.
626
417k
      unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
627
417k
      for (unsigned i = 0; 
i != NumPreNoops417k
;
++i0
)
628
0
        emitNoop(CurCycle);
629
417k
630
417k
      // ... schedule the node...
631
417k
      ScheduleNodeTopDown(FoundSUnit, CurCycle);
632
417k
      HazardRec->EmitInstruction(FoundSUnit);
633
417k
      CycleHasInsts = true;
634
417k
      if (
HazardRec->atIssueLimit()417k
) {
635
214k
        DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
636
214k
        HazardRec->AdvanceCycle();
637
214k
        ++CurCycle;
638
214k
        CycleHasInsts = false;
639
214k
      }
640
1.04M
    } else {
641
627k
      if (
CycleHasInsts627k
) {
642
80.3k
        DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
643
80.3k
        HazardRec->AdvanceCycle();
644
627k
      } else 
if (547k
!HasNoopHazards547k
) {
645
545k
        // Otherwise, we have a pipeline stall, but no other problem,
646
545k
        // just advance the current cycle and try again.
647
545k
        DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
648
545k
        HazardRec->AdvanceCycle();
649
545k
        ++NumStalls;
650
547k
      } else {
651
1.70k
        // Otherwise, we have no instructions to issue and we have instructions
652
1.70k
        // that will fault if we don't do this right.  This is the case for
653
1.70k
        // processors without pipeline interlocks and other cases.
654
1.70k
        emitNoop(CurCycle);
655
1.70k
      }
656
627k
657
627k
      ++CurCycle;
658
627k
      CycleHasInsts = false;
659
627k
    }
660
1.04M
  }
661
185k
662
#ifndef NDEBUG
663
  unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
664
  unsigned Noops = 0;
665
  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
666
    if (!Sequence[i])
667
      ++Noops;
668
  assert(Sequence.size() - Noops == ScheduledNodes &&
669
         "The number of nodes scheduled doesn't match the expected number!");
670
#endif // NDEBUG
671
}
672
673
// EmitSchedule - Emit the machine code in scheduled order.
674
185k
void SchedulePostRATDList::EmitSchedule() {
675
185k
  RegionBegin = RegionEnd;
676
185k
677
185k
  // If first instruction was a DBG_VALUE then put it back.
678
185k
  if (FirstDbgValue)
679
34
    BB->splice(RegionEnd, BB, FirstDbgValue);
680
185k
681
185k
  // Then re-insert them according to the given schedule.
682
604k
  for (unsigned i = 0, e = Sequence.size(); 
i != e604k
;
i++419k
) {
683
419k
    if (SUnit *SU = Sequence[i])
684
417k
      BB->splice(RegionEnd, BB, SU->getInstr());
685
419k
    else
686
419k
      // Null SUnit* is a noop.
687
1.70k
      TII->insertNoop(*BB, RegionEnd);
688
419k
689
419k
    // Update the Begin iterator, as the first instruction in the block
690
419k
    // may have been scheduled later.
691
419k
    if (i == 0)
692
76.6k
      RegionBegin = std::prev(RegionEnd);
693
419k
  }
694
185k
695
185k
  // Reinsert any remaining debug_values.
696
185k
  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
697
185k
         DI = DbgValues.end(), DE = DbgValues.begin(); 
DI != DE185k
;
--DI114
) {
698
114
    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
699
114
    MachineInstr *DbgValue = P.first;
700
114
    MachineBasicBlock::iterator OrigPrivMI = P.second;
701
114
    BB->splice(++OrigPrivMI, BB, DbgValue);
702
114
  }
703
185k
  DbgValues.clear();
704
185k
  FirstDbgValue = nullptr;
705
185k
}