Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/MachineScheduler.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// MachineScheduler schedules machine instructions after phi elimination. It
11
// preserves LiveIntervals so it can be invoked before register allocation.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/CodeGen/MachineScheduler.h"
16
#include "llvm/ADT/ArrayRef.h"
17
#include "llvm/ADT/BitVector.h"
18
#include "llvm/ADT/DenseMap.h"
19
#include "llvm/ADT/PriorityQueue.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/SmallVector.h"
22
#include "llvm/ADT/iterator_range.h"
23
#include "llvm/Analysis/AliasAnalysis.h"
24
#include "llvm/CodeGen/LiveInterval.h"
25
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
26
#include "llvm/CodeGen/MachineBasicBlock.h"
27
#include "llvm/CodeGen/MachineDominators.h"
28
#include "llvm/CodeGen/MachineFunction.h"
29
#include "llvm/CodeGen/MachineFunctionPass.h"
30
#include "llvm/CodeGen/MachineInstr.h"
31
#include "llvm/CodeGen/MachineLoopInfo.h"
32
#include "llvm/CodeGen/MachineOperand.h"
33
#include "llvm/CodeGen/MachinePassRegistry.h"
34
#include "llvm/CodeGen/MachineRegisterInfo.h"
35
#include "llvm/CodeGen/MachineValueType.h"
36
#include "llvm/CodeGen/Passes.h"
37
#include "llvm/CodeGen/RegisterClassInfo.h"
38
#include "llvm/CodeGen/RegisterPressure.h"
39
#include "llvm/CodeGen/ScheduleDAG.h"
40
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
41
#include "llvm/CodeGen/ScheduleDAGMutation.h"
42
#include "llvm/CodeGen/ScheduleDFS.h"
43
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
44
#include "llvm/CodeGen/SlotIndexes.h"
45
#include "llvm/CodeGen/TargetPassConfig.h"
46
#include "llvm/CodeGen/TargetSchedule.h"
47
#include "llvm/MC/LaneBitmask.h"
48
#include "llvm/Pass.h"
49
#include "llvm/Support/CommandLine.h"
50
#include "llvm/Support/Compiler.h"
51
#include "llvm/Support/Debug.h"
52
#include "llvm/Support/ErrorHandling.h"
53
#include "llvm/Support/GraphWriter.h"
54
#include "llvm/Support/raw_ostream.h"
55
#include "llvm/Target/TargetInstrInfo.h"
56
#include "llvm/Target/TargetLowering.h"
57
#include "llvm/Target/TargetRegisterInfo.h"
58
#include "llvm/Target/TargetSubtargetInfo.h"
59
#include <algorithm>
60
#include <cassert>
61
#include <cstdint>
62
#include <iterator>
63
#include <limits>
64
#include <memory>
65
#include <string>
66
#include <tuple>
67
#include <utility>
68
#include <vector>
69
70
using namespace llvm;
71
72
#define DEBUG_TYPE "machine-scheduler"
73
74
namespace llvm {
75
76
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
77
                           cl::desc("Force top-down list scheduling"));
78
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
79
                            cl::desc("Force bottom-up list scheduling"));
80
cl::opt<bool>
81
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
82
                       cl::desc("Print critical path length to stdout"));
83
84
} // end namespace llvm
85
86
#ifndef NDEBUG
87
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
88
  cl::desc("Pop up a window to show MISched dags after they are processed"));
89
90
/// In some situations a few uninteresting nodes depend on nearly all other
91
/// nodes in the graph, provide a cutoff to hide them.
92
static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
93
  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
94
95
static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
96
  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
97
98
static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
99
  cl::desc("Only schedule this function"));
100
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
101
  cl::desc("Only schedule this MBB#"));
102
#else
103
static bool ViewMISchedDAGs = false;
104
#endif // NDEBUG
105
106
/// Avoid quadratic complexity in unusually large basic blocks by limiting the
107
/// size of the ready lists.
108
static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
109
  cl::desc("Limit ready list to N instructions"), cl::init(256));
110
111
static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
112
  cl::desc("Enable register pressure scheduling."), cl::init(true));
113
114
static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
115
  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
116
117
static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
118
                                        cl::desc("Enable memop clustering."),
119
                                        cl::init(true));
120
121
static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
122
  cl::desc("Verify machine instrs before and after machine scheduling"));
123
124
// DAG subtrees must have at least this many nodes.
125
static const unsigned MinSubtreeSize = 8;
126
127
// Pin the vtables to this file.
128
0
void MachineSchedStrategy::anchor() {}
129
130
0
void ScheduleDAGMutation::anchor() {}
131
132
//===----------------------------------------------------------------------===//
133
// Machine Instruction Scheduling Pass and Registry
134
//===----------------------------------------------------------------------===//
135
136
46.6k
MachineSchedContext::MachineSchedContext() {
137
46.6k
  RegClassInfo = new RegisterClassInfo();
138
46.6k
}
139
140
46.5k
MachineSchedContext::~MachineSchedContext() {
141
46.5k
  delete RegClassInfo;
142
46.5k
}
143
144
namespace {
145
146
/// Base class for a machine scheduler class that can run at any point.
147
class MachineSchedulerBase : public MachineSchedContext,
148
                             public MachineFunctionPass {
149
public:
150
46.6k
  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
151
152
  void print(raw_ostream &O, const Module* = nullptr) const override;
153
154
protected:
155
  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
156
};
157
158
/// MachineScheduler runs after coalescing and before register allocation.
159
class MachineScheduler : public MachineSchedulerBase {
160
public:
161
  MachineScheduler();
162
163
  void getAnalysisUsage(AnalysisUsage &AU) const override;
164
165
  bool runOnMachineFunction(MachineFunction&) override;
166
167
  static char ID; // Class identification, replacement for typeinfo
168
169
protected:
170
  ScheduleDAGInstrs *createMachineScheduler();
171
};
172
173
/// PostMachineScheduler runs after shortly before code emission.
174
class PostMachineScheduler : public MachineSchedulerBase {
175
public:
176
  PostMachineScheduler();
177
178
  void getAnalysisUsage(AnalysisUsage &AU) const override;
179
180
  bool runOnMachineFunction(MachineFunction&) override;
181
182
  static char ID; // Class identification, replacement for typeinfo
183
184
protected:
185
  ScheduleDAGInstrs *createPostMachineScheduler();
186
};
187
188
} // end anonymous namespace
189
190
char MachineScheduler::ID = 0;
191
192
char &llvm::MachineSchedulerID = MachineScheduler::ID;
193
194
36.7k
INITIALIZE_PASS_BEGIN36.7k
(MachineScheduler, DEBUG_TYPE,
195
36.7k
                      "Machine Instruction Scheduler", false, false)
196
36.7k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197
36.7k
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
198
36.7k
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
199
36.7k
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
200
36.7k
INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
201
                    "Machine Instruction Scheduler", false, false)
202
203
32.0k
MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
204
32.0k
  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
205
32.0k
}
206
207
32.0k
void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
208
32.0k
  AU.setPreservesCFG();
209
32.0k
  AU.addRequiredID(MachineDominatorsID);
210
32.0k
  AU.addRequired<MachineLoopInfo>();
211
32.0k
  AU.addRequired<AAResultsWrapperPass>();
212
32.0k
  AU.addRequired<TargetPassConfig>();
213
32.0k
  AU.addRequired<SlotIndexes>();
214
32.0k
  AU.addPreserved<SlotIndexes>();
215
32.0k
  AU.addRequired<LiveIntervals>();
216
32.0k
  AU.addPreserved<LiveIntervals>();
217
32.0k
  MachineFunctionPass::getAnalysisUsage(AU);
218
32.0k
}
219
220
char PostMachineScheduler::ID = 0;
221
222
char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
223
224
INITIALIZE_PASS(PostMachineScheduler, "postmisched",
225
                "PostRA Machine Instruction Scheduler", false, false)
226
227
14.5k
PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
228
14.5k
  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
229
14.5k
}
230
231
14.5k
void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
232
14.5k
  AU.setPreservesCFG();
233
14.5k
  AU.addRequiredID(MachineDominatorsID);
234
14.5k
  AU.addRequired<MachineLoopInfo>();
235
14.5k
  AU.addRequired<TargetPassConfig>();
236
14.5k
  MachineFunctionPass::getAnalysisUsage(AU);
237
14.5k
}
238
239
MachinePassRegistry MachineSchedRegistry::Registry;
240
241
/// A dummy default scheduler factory indicates whether the scheduler
242
/// is overridden on the command line.
243
0
static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
244
0
  return nullptr;
245
0
}
246
247
/// MachineSchedOpt allows command line selection of the scheduler.
248
static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
249
               RegisterPassParser<MachineSchedRegistry>>
250
MachineSchedOpt("misched",
251
                cl::init(&useDefaultMachineSched), cl::Hidden,
252
                cl::desc("Machine instruction scheduler to use"));
253
254
static MachineSchedRegistry
255
DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
256
                     useDefaultMachineSched);
257
258
static cl::opt<bool> EnableMachineSched(
259
    "enable-misched",
260
    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
261
    cl::Hidden);
262
263
static cl::opt<bool> EnablePostRAMachineSched(
264
    "enable-post-misched",
265
    cl::desc("Enable the post-ra machine instruction scheduling pass."),
266
    cl::init(true), cl::Hidden);
267
268
/// Decrement this iterator until reaching the top or a non-debug instr.
269
static MachineBasicBlock::const_iterator
270
priorNonDebug(MachineBasicBlock::const_iterator I,
271
19.3M
              MachineBasicBlock::const_iterator Beg) {
272
19.3M
  assert(I != Beg && "reached the top of the region, cannot decrement");
273
19.3M
  while (
--I != Beg19.3M
) {
274
15.2M
    if (!I->isDebugValue())
275
15.2M
      break;
276
15.2M
  }
277
19.3M
  return I;
278
19.3M
}
279
280
/// Non-const version.
281
static MachineBasicBlock::iterator
282
priorNonDebug(MachineBasicBlock::iterator I,
283
19.3M
              MachineBasicBlock::const_iterator Beg) {
284
19.3M
  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
285
19.3M
      .getNonConstIterator();
286
19.3M
}
287
288
/// If this iterator is a debug value, increment until reaching the End or a
289
/// non-debug instruction.
290
static MachineBasicBlock::const_iterator
291
nextIfDebug(MachineBasicBlock::const_iterator I,
292
11.9M
            MachineBasicBlock::const_iterator End) {
293
11.9M
  for(; 
I != End11.9M
;
++I359
) {
294
11.3M
    if (!I->isDebugValue())
295
11.3M
      break;
296
11.3M
  }
297
11.9M
  return I;
298
11.9M
}
299
300
/// Non-const version.
301
static MachineBasicBlock::iterator
302
nextIfDebug(MachineBasicBlock::iterator I,
303
9.80M
            MachineBasicBlock::const_iterator End) {
304
9.80M
  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
305
9.80M
      .getNonConstIterator();
306
9.80M
}
307
308
/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
309
552k
ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
310
552k
  // Select the scheduler, or set the default.
311
552k
  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
312
552k
  if (Ctor != useDefaultMachineSched)
313
15
    return Ctor(this);
314
552k
315
552k
  // Get the default scheduler set by the target for this function.
316
552k
  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
317
552k
  if (Scheduler)
318
546k
    return Scheduler;
319
5.96k
320
5.96k
  // Default to GenericScheduler.
321
5.96k
  return createGenericSchedLive(this);
322
5.96k
}
323
324
/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
325
/// the caller. We don't have a command line option to override the postRA
326
/// scheduler. The Target must configure it.
327
11.4k
ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
328
11.4k
  // Get the postRA scheduler set by the target for this function.
329
11.4k
  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
330
11.4k
  if (Scheduler)
331
11.3k
    return Scheduler;
332
101
333
101
  // Default to GenericScheduler.
334
101
  return createGenericSchedPostRA(this);
335
101
}
336
337
/// Top-level MachineScheduler pass driver.
338
///
339
/// Visit blocks in function order. Divide each block into scheduling regions
340
/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
341
/// consistent with the DAG builder, which traverses the interior of the
342
/// scheduling regions bottom-up.
343
///
344
/// This design avoids exposing scheduling boundaries to the DAG builder,
345
/// simplifying the DAG builder's support for "special" target instructions.
346
/// At the same time the design allows target schedulers to operate across
347
/// scheduling boundaries, for example to bundle the boudary instructions
348
/// without reordering them. This creates complexity, because the target
349
/// scheduler must update the RegionBegin and RegionEnd positions cached by
350
/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
351
/// design would be to split blocks at scheduling boundaries, but LLVM has a
352
/// general bias against block splitting purely for implementation simplicity.
353
588k
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
354
588k
  if (skipFunction(*mf.getFunction()))
355
44
    return false;
356
588k
357
588k
  
if (588k
EnableMachineSched.getNumOccurrences()588k
) {
358
459
    if (!EnableMachineSched)
359
372
      return false;
360
587k
  } else 
if (587k
!mf.getSubtarget().enableMachineScheduler()587k
)
361
35.9k
    return false;
362
552k
363
552k
  
DEBUG552k
(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
364
552k
365
552k
  // Initialize the context of the pass.
366
552k
  MF = &mf;
367
552k
  MLI = &getAnalysis<MachineLoopInfo>();
368
552k
  MDT = &getAnalysis<MachineDominatorTree>();
369
552k
  PassConfig = &getAnalysis<TargetPassConfig>();
370
552k
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
371
552k
372
552k
  LIS = &getAnalysis<LiveIntervals>();
373
552k
374
552k
  if (
VerifyScheduling552k
) {
375
1
    DEBUG(LIS->dump());
376
1
    MF->verify(this, "Before machine scheduling.");
377
1
  }
378
552k
  RegClassInfo->runOnMachineFunction(*MF);
379
552k
380
552k
  // Instantiate the selected scheduler for this target, function, and
381
552k
  // optimization level.
382
552k
  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
383
552k
  scheduleRegions(*Scheduler, false);
384
552k
385
552k
  DEBUG(LIS->dump());
386
552k
  if (VerifyScheduling)
387
1
    MF->verify(this, "After machine scheduling.");
388
588k
  return true;
389
588k
}
390
391
460k
bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
392
460k
  if (skipFunction(*mf.getFunction()))
393
1
    return false;
394
460k
395
460k
  
if (460k
EnablePostRAMachineSched.getNumOccurrences()460k
) {
396
45
    if (!EnablePostRAMachineSched)
397
45
      return false;
398
460k
  } else 
if (460k
!mf.getSubtarget().enablePostRAScheduler()460k
) {
399
448k
    DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
400
460k
    return false;
401
460k
  }
402
11.4k
  
DEBUG11.4k
(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
403
11.4k
404
11.4k
  // Initialize the context of the pass.
405
11.4k
  MF = &mf;
406
11.4k
  MLI = &getAnalysis<MachineLoopInfo>();
407
11.4k
  PassConfig = &getAnalysis<TargetPassConfig>();
408
11.4k
409
11.4k
  if (VerifyScheduling)
410
0
    MF->verify(this, "Before post machine scheduling.");
411
11.4k
412
11.4k
  // Instantiate the selected scheduler for this target, function, and
413
11.4k
  // optimization level.
414
11.4k
  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
415
11.4k
  scheduleRegions(*Scheduler, true);
416
11.4k
417
11.4k
  if (VerifyScheduling)
418
0
    MF->verify(this, "After post machine scheduling.");
419
460k
  return true;
420
460k
}
421
422
/// Return true of the given instruction should not be included in a scheduling
423
/// region.
424
///
425
/// MachineScheduler does not currently support scheduling across calls. To
426
/// handle calls, the DAG builder needs to be modified to create register
427
/// anti/output dependencies on the registers clobbered by the call's regmask
428
/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
429
/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
430
/// the boundary, but there would be no benefit to postRA scheduling across
431
/// calls this late anyway.
432
static bool isSchedBoundary(MachineBasicBlock::iterator MI,
433
                            MachineBasicBlock *MBB,
434
                            MachineFunction *MF,
435
31.1M
                            const TargetInstrInfo *TII) {
436
28.7M
  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
437
31.1M
}
438
439
/// A region of an MBB for scheduling.
440
namespace {
441
struct SchedRegion {
442
  /// RegionBegin is the first instruction in the scheduling region, and
443
  /// RegionEnd is either MBB->end() or the scheduling boundary after the
444
  /// last instruction in the scheduling region. These iterators cannot refer
445
  /// to instructions outside of the identified scheduling region because
446
  /// those may be reordered before scheduling this region.
447
  MachineBasicBlock::iterator RegionBegin;
448
  MachineBasicBlock::iterator RegionEnd;
449
  unsigned NumRegionInstrs;
450
451
  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
452
              unsigned N) :
453
11.7M
    RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
454
};
455
} // end anonymous namespace
456
457
using MBBRegionsVector = SmallVector<SchedRegion, 16>;
458
459
static void
460
getSchedRegions(MachineBasicBlock *MBB,
461
                MBBRegionsVector &Regions,
462
4.18M
                bool RegionsTopDown) {
463
4.18M
  MachineFunction *MF = MBB->getParent();
464
4.18M
  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
465
4.18M
466
4.18M
  MachineBasicBlock::iterator I = nullptr;
467
4.18M
  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
468
15.9M
      
RegionEnd != MBB->begin()15.9M
;
RegionEnd = I11.7M
) {
469
11.7M
470
11.7M
    // Avoid decrementing RegionEnd for blocks with no terminator.
471
11.7M
    if (RegionEnd != MBB->end() ||
472
11.7M
        
isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)4.09M
) {
473
11.2M
      --RegionEnd;
474
11.2M
    }
475
11.7M
476
11.7M
    // The next region starts above the previous region. Look backward in the
477
11.7M
    // instruction stream until we find the nearest boundary.
478
11.7M
    unsigned NumRegionInstrs = 0;
479
11.7M
    I = RegionEnd;
480
31.1M
    for (;
I != MBB->begin()31.1M
;
--I19.3M
) {
481
27.0M
      MachineInstr &MI = *std::prev(I);
482
27.0M
      if (isSchedBoundary(&MI, &*MBB, MF, TII))
483
7.69M
        break;
484
19.3M
      
if (19.3M
!MI.isDebugValue()19.3M
)
485
19.3M
        // MBB::size() uses instr_iterator to count. Here we need a bundle to
486
19.3M
        // count as a single instruction.
487
19.3M
        ++NumRegionInstrs;
488
27.0M
    }
489
11.7M
490
11.7M
    Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
491
11.7M
  }
492
4.18M
493
4.18M
  if (RegionsTopDown)
494
3.25k
    std::reverse(Regions.begin(), Regions.end());
495
4.18M
}
496
497
/// Main driver for both MachineScheduler and PostMachineScheduler.
498
void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
499
563k
                                           bool FixKillFlags) {
500
563k
  // Visit all machine basic blocks.
501
563k
  //
502
563k
  // TODO: Visit blocks in global postorder or postorder within the bottom-up
503
563k
  // loop tree. Then we can optionally compute global RegPressure.
504
563k
  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
505
4.74M
       
MBB != MBBEnd4.74M
;
++MBB4.18M
) {
506
4.18M
507
4.18M
    Scheduler.startBlock(&*MBB);
508
4.18M
509
#ifndef NDEBUG
510
    if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
511
      continue;
512
    if (SchedOnlyBlock.getNumOccurrences()
513
        && (int)SchedOnlyBlock != MBB->getNumber())
514
      continue;
515
#endif
516
517
4.18M
    // Break the block into scheduling regions [I, RegionEnd). RegionEnd
518
4.18M
    // points to the scheduling boundary at the bottom of the region. The DAG
519
4.18M
    // does not include RegionEnd, but the region does (i.e. the next
520
4.18M
    // RegionEnd is above the previous RegionBegin). If the current block has
521
4.18M
    // no terminator then RegionEnd == MBB->end() for the bottom region.
522
4.18M
    //
523
4.18M
    // All the regions of MBB are first found and stored in MBBRegions, which
524
4.18M
    // will be processed (MBB) top-down if initialized with true.
525
4.18M
    //
526
4.18M
    // The Scheduler may insert instructions during either schedule() or
527
4.18M
    // exitRegion(), even for empty regions. So the local iterators 'I' and
528
4.18M
    // 'RegionEnd' are invalid across these calls. Instructions must not be
529
4.18M
    // added to other regions than the current one without updating MBBRegions.
530
4.18M
531
4.18M
    MBBRegionsVector MBBRegions;
532
4.18M
    getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
533
4.18M
    for (MBBRegionsVector::iterator R = MBBRegions.begin();
534
15.9M
         
R != MBBRegions.end()15.9M
;
++R11.7M
) {
535
11.7M
      MachineBasicBlock::iterator I = R->RegionBegin;
536
11.7M
      MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
537
11.7M
      unsigned NumRegionInstrs = R->NumRegionInstrs;
538
11.7M
539
11.7M
      // Notify the scheduler of the region, even if we may skip scheduling
540
11.7M
      // it. Perhaps it still needs to be bundled.
541
11.7M
      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
542
11.7M
543
11.7M
      // Skip empty scheduling regions (0 or 1 schedulable instructions).
544
11.7M
      if (
I == RegionEnd || 11.7M
I == std::prev(RegionEnd)6.79M
) {
545
7.70M
        // Close the current region. Bundle the terminator if needed.
546
7.70M
        // This invalidates 'RegionEnd' and 'I'.
547
7.70M
        Scheduler.exitRegion();
548
7.70M
        continue;
549
7.70M
      }
550
4.08M
      
DEBUG4.08M
(dbgs() << "********** MI Scheduling **********\n");
551
4.08M
      DEBUG(dbgs() << MF->getName()
552
4.08M
            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
553
4.08M
            << "\n  From: " << *I << "    To: ";
554
4.08M
            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
555
4.08M
            else dbgs() << "End";
556
4.08M
            dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
557
4.08M
      if (
DumpCriticalPathLength4.08M
) {
558
0
        errs() << MF->getName();
559
0
        errs() << ":BB# " << MBB->getNumber();
560
0
        errs() << " " << MBB->getName() << " \n";
561
0
      }
562
11.7M
563
11.7M
      // Schedule a region: possibly reorder instructions.
564
11.7M
      // This invalidates the original region iterators.
565
11.7M
      Scheduler.schedule();
566
11.7M
567
11.7M
      // Close the current region.
568
11.7M
      Scheduler.exitRegion();
569
11.7M
    }
570
4.18M
    Scheduler.finishBlock();
571
4.18M
    // FIXME: Ideally, no further passes should rely on kill flags. However,
572
4.18M
    // thumb2 size reduction is currently an exception, so the PostMIScheduler
573
4.18M
    // needs to do this.
574
4.18M
    if (FixKillFlags)
575
13.2k
      Scheduler.fixupKills(*MBB);
576
4.18M
  }
577
563k
  Scheduler.finalizeSchedule();
578
563k
}
579
580
0
void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
581
0
  // unimplemented
582
0
}
583
584
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
585
LLVM_DUMP_METHOD void ReadyQueue::dump() const {
586
  dbgs() << "Queue " << Name << ": ";
587
  for (const SUnit *SU : Queue)
588
    dbgs() << SU->NodeNum << " ";
589
  dbgs() << "\n";
590
}
591
#endif
592
593
//===----------------------------------------------------------------------===//
594
// ScheduleDAGMI - Basic machine instruction scheduling. This is
595
// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
596
// virtual registers.
597
// ===----------------------------------------------------------------------===/
598
599
// Provide a vtable anchor.
600
563k
ScheduleDAGMI::~ScheduleDAGMI() = default;
601
602
46.3k
bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
603
46.3k
  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
604
46.3k
}
605
606
1.39M
bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
607
1.39M
  if (
SuccSU != &ExitSU1.39M
) {
608
665k
    // Do not use WillCreateCycle, it assumes SD scheduling.
609
665k
    // If Pred is reachable from Succ, then the edge creates a cycle.
610
665k
    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
611
6.14k
      return false;
612
659k
    Topo.AddPred(SuccSU, PredDep.getSUnit());
613
659k
  }
614
1.38M
  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
615
1.38M
  // Return true regardless of whether a new edge needed to be inserted.
616
1.38M
  return true;
617
1.39M
}
618
619
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
620
/// NumPredsLeft reaches zero, release the successor node.
621
///
622
/// FIXME: Adjust SuccSU height based on MinLatency.
623
1.47M
void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
624
1.47M
  SUnit *SuccSU = SuccEdge->getSUnit();
625
1.47M
626
1.47M
  if (
SuccEdge->isWeak()1.47M
) {
627
5.67k
    --SuccSU->WeakPredsLeft;
628
5.67k
    if (SuccEdge->isCluster())
629
5.47k
      NextClusterSucc = SuccSU;
630
5.67k
    return;
631
5.67k
  }
632
#ifndef NDEBUG
633
  if (SuccSU->NumPredsLeft == 0) {
634
    dbgs() << "*** Scheduling failed! ***\n";
635
    SuccSU->dump(this);
636
    dbgs() << " has been released too many times!\n";
637
    llvm_unreachable(nullptr);
638
  }
639
#endif
640
  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
641
1.46M
  // CurrCycle may have advanced since then.
642
1.46M
  
if (1.46M
SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()1.46M
)
643
1.15M
    SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
644
1.46M
645
1.46M
  --SuccSU->NumPredsLeft;
646
1.46M
  if (
SuccSU->NumPredsLeft == 0 && 1.46M
SuccSU != &ExitSU880k
)
647
842k
    SchedImpl->releaseTopNode(SuccSU);
648
1.47M
}
649
650
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
651
5.47M
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
652
5.47M
  for (SDep &Succ : SU->Succs)
653
1.47M
    releaseSucc(SU, &Succ);
654
5.47M
}
655
656
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
657
/// NumSuccsLeft reaches zero, release the predecessor node.
658
///
659
/// FIXME: Adjust PredSU height based on MinLatency.
660
24.2M
void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
661
24.2M
  SUnit *PredSU = PredEdge->getSUnit();
662
24.2M
663
24.2M
  if (
PredEdge->isWeak()24.2M
) {
664
984k
    --PredSU->WeakSuccsLeft;
665
984k
    if (PredEdge->isCluster())
666
961k
      NextClusterPred = PredSU;
667
984k
    return;
668
984k
  }
669
#ifndef NDEBUG
670
  if (PredSU->NumSuccsLeft == 0) {
671
    dbgs() << "*** Scheduling failed! ***\n";
672
    PredSU->dump(this);
673
    dbgs() << " has been released too many times!\n";
674
    llvm_unreachable(nullptr);
675
  }
676
#endif
677
  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
678
23.2M
  // CurrCycle may have advanced since then.
679
23.2M
  
if (23.2M
PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()23.2M
)
680
12.8M
    PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
681
23.2M
682
23.2M
  --PredSU->NumSuccsLeft;
683
23.2M
  if (
PredSU->NumSuccsLeft == 0 && 23.2M
PredSU != &EntrySU13.6M
)
684
13.6M
    SchedImpl->releaseBottomNode(PredSU);
685
24.2M
}
686
687
/// releasePredecessors - Call releasePred on each of SU's predecessors.
688
19.3M
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
689
19.3M
  for (SDep &Pred : SU->Preds)
690
24.2M
    releasePred(SU, &Pred);
691
19.3M
}
692
693
4.19M
void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
694
4.19M
  ScheduleDAGInstrs::startBlock(bb);
695
4.19M
  SchedImpl->enterMBB(bb);
696
4.19M
}
697
698
4.19M
void ScheduleDAGMI::finishBlock() {
699
4.19M
  SchedImpl->leaveMBB();
700
4.19M
  ScheduleDAGInstrs::finishBlock();
701
4.19M
}
702
703
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
704
/// crossing a scheduling boundary. [begin, end) includes all instructions in
705
/// the region, including the boundary itself and single-instruction regions
706
/// that don't get scheduled.
707
void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
708
                                     MachineBasicBlock::iterator begin,
709
                                     MachineBasicBlock::iterator end,
710
                                     unsigned regioninstrs)
711
11.8M
{
712
11.8M
  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
713
11.8M
714
11.8M
  SchedImpl->initPolicy(begin, end, regioninstrs);
715
11.8M
}
716
717
/// This is normally called from the main scheduler loop but may also be invoked
718
/// by the scheduling strategy to perform additional code motion.
719
void ScheduleDAGMI::moveInstruction(
720
1.78M
  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
721
1.78M
  // Advance RegionBegin if the first instruction moves down.
722
1.78M
  if (&*RegionBegin == MI)
723
382k
    ++RegionBegin;
724
1.78M
725
1.78M
  // Update the instruction stream.
726
1.78M
  BB->splice(InsertPos, BB, MI);
727
1.78M
728
1.78M
  // Update LiveIntervals
729
1.78M
  if (LIS)
730
1.78M
    LIS->handleMove(*MI, /*UpdateFlags=*/true);
731
1.78M
732
1.78M
  // Recede RegionBegin if an instruction moves above the first.
733
1.78M
  if (RegionBegin == InsertPos)
734
91.5k
    RegionBegin = MI;
735
1.78M
}
736
737
16.6M
bool ScheduleDAGMI::checkSchedLimit() {
738
#ifndef NDEBUG
739
  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
740
    CurrentTop = CurrentBottom;
741
    return false;
742
  }
743
  ++NumInstrsScheduled;
744
#endif
745
  return true;
746
16.6M
}
747
748
/// Per-region scheduling driver, called back from
749
/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
750
/// does not consider liveness or register pressure. It is useful for PostRA
751
/// scheduling and potentially other custom schedulers.
752
7.71k
void ScheduleDAGMI::schedule() {
753
7.71k
  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
754
7.71k
  DEBUG(SchedImpl->dumpPolicy());
755
7.71k
756
7.71k
  // Build the DAG.
757
7.71k
  buildSchedGraph(AA);
758
7.71k
759
7.71k
  Topo.InitDAGTopologicalSorting();
760
7.71k
761
7.71k
  postprocessDAG();
762
7.71k
763
7.71k
  SmallVector<SUnit*, 8> TopRoots, BotRoots;
764
7.71k
  findRootsAndBiasEdges(TopRoots, BotRoots);
765
7.71k
766
7.71k
  // Initialize the strategy before modifying the DAG.
767
7.71k
  // This may initialize a DFSResult to be used for queue priority.
768
7.71k
  SchedImpl->initialize(this);
769
7.71k
770
7.71k
  DEBUG(
771
7.71k
    if (EntrySU.getInstr() != nullptr)
772
7.71k
      EntrySU.dumpAll(this);
773
7.71k
    for (const SUnit &SU : SUnits)
774
7.71k
      SU.dumpAll(this);
775
7.71k
    if (ExitSU.getInstr() != nullptr)
776
7.71k
      ExitSU.dumpAll(this);
777
7.71k
  );
778
7.71k
  if (
ViewMISchedDAGs7.71k
)
viewGraph()0
;
779
7.71k
780
7.71k
  // Initialize ready queues now that the DAG and priority data are finalized.
781
7.71k
  initQueues(TopRoots, BotRoots);
782
7.71k
783
7.71k
  bool IsTopNode = false;
784
46.5k
  while (
true46.5k
) {
785
46.5k
    DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
786
46.5k
    SUnit *SU = SchedImpl->pickNode(IsTopNode);
787
46.5k
    if (
!SU46.5k
)
break7.71k
;
788
38.8k
789
46.5k
    assert(!SU->isScheduled && "Node already scheduled");
790
38.8k
    if (!checkSchedLimit())
791
0
      break;
792
38.8k
793
38.8k
    MachineInstr *MI = SU->getInstr();
794
38.8k
    if (
IsTopNode38.8k
) {
795
38.8k
      assert(SU->isTopReady() && "node still has unscheduled dependencies");
796
38.8k
      if (&*CurrentTop == MI)
797
34.8k
        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
798
38.8k
      else
799
4.01k
        moveInstruction(MI, CurrentTop);
800
0
    } else {
801
0
      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
802
0
      MachineBasicBlock::iterator priorII =
803
0
        priorNonDebug(CurrentBottom, CurrentTop);
804
0
      if (&*priorII == MI)
805
0
        CurrentBottom = priorII;
806
0
      else {
807
0
        if (&*CurrentTop == MI)
808
0
          CurrentTop = nextIfDebug(++CurrentTop, priorII);
809
0
        moveInstruction(MI, CurrentBottom);
810
0
        CurrentBottom = MI;
811
0
      }
812
0
    }
813
46.5k
    // Notify the scheduling strategy before updating the DAG.
814
46.5k
    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
815
46.5k
    // runs, it can then use the accurate ReadyCycle time to determine whether
816
46.5k
    // newly released nodes can move to the readyQ.
817
46.5k
    SchedImpl->schedNode(SU, IsTopNode);
818
46.5k
819
46.5k
    updateQueues(SU, IsTopNode);
820
46.5k
  }
821
7.71k
  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
822
7.71k
823
7.71k
  placeDebugValues();
824
7.71k
825
7.71k
  DEBUG({
826
7.71k
      unsigned BBNum = begin()->getParent()->getNumber();
827
7.71k
      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
828
7.71k
      dumpSchedule();
829
7.71k
      dbgs() << '\n';
830
7.71k
    });
831
7.71k
}
832
833
/// Apply each ScheduleDAGMutation step in order.
834
4.08M
void ScheduleDAGMI::postprocessDAG() {
835
4.08M
  for (auto &m : Mutations)
836
15.9M
    m->apply(this);
837
4.08M
}
838
839
void ScheduleDAGMI::
840
findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
841
4.08M
                      SmallVectorImpl<SUnit*> &BotRoots) {
842
16.6M
  for (SUnit &SU : SUnits) {
843
16.6M
    assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
844
16.6M
845
16.6M
    // Order predecessors so DFSResult follows the critical path.
846
16.6M
    SU.biasCriticalPath();
847
16.6M
848
16.6M
    // A SUnit is ready to top schedule if it has no predecessors.
849
16.6M
    if (!SU.NumPredsLeft)
850
10.1M
      TopRoots.push_back(&SU);
851
16.6M
    // A SUnit is ready to bottom schedule if it has no successors.
852
16.6M
    if (!SU.NumSuccsLeft)
853
2.86M
      BotRoots.push_back(&SU);
854
16.6M
  }
855
4.08M
  ExitSU.biasCriticalPath();
856
4.08M
}
857
858
/// Identify DAG roots and setup scheduler queues.
859
void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
860
4.08M
                               ArrayRef<SUnit*> BotRoots) {
861
4.08M
  NextClusterSucc = nullptr;
862
4.08M
  NextClusterPred = nullptr;
863
4.08M
864
4.08M
  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
865
4.08M
  //
866
4.08M
  // Nodes with unreleased weak edges can still be roots.
867
4.08M
  // Release top roots in forward order.
868
4.08M
  for (SUnit *SU : TopRoots)
869
10.1M
    SchedImpl->releaseTopNode(SU);
870
4.08M
871
4.08M
  // Release bottom roots in reverse order so the higher priority nodes appear
872
4.08M
  // first. This is more natural and slightly more efficient.
873
4.08M
  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
874
6.95M
         I = BotRoots.rbegin(), E = BotRoots.rend(); 
I != E6.95M
;
++I2.86M
) {
875
2.86M
    SchedImpl->releaseBottomNode(*I);
876
2.86M
  }
877
4.08M
878
4.08M
  releaseSuccessors(&EntrySU);
879
4.08M
  releasePredecessors(&ExitSU);
880
4.08M
881
4.08M
  SchedImpl->registerRoots();
882
4.08M
883
4.08M
  // Advance past initial DebugValues.
884
4.08M
  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
885
4.08M
  CurrentBottom = RegionEnd;
886
4.08M
}
887
888
/// Update scheduler queues after scheduling an instruction.
889
16.6M
void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
890
16.6M
  // Release dependent instructions for scheduling.
891
16.6M
  if (IsTopNode)
892
1.38M
    releaseSuccessors(SU);
893
16.6M
  else
894
15.2M
    releasePredecessors(SU);
895
16.6M
896
16.6M
  SU->isScheduled = true;
897
16.6M
}
898
899
/// Reinsert any remaining debug_values, just like the PostRA scheduler.
900
4.08M
void ScheduleDAGMI::placeDebugValues() {
901
4.08M
  // If first instruction was a DBG_VALUE then put it back.
902
4.08M
  if (
FirstDbgValue4.08M
) {
903
123
    BB->splice(RegionBegin, BB, FirstDbgValue);
904
123
    RegionBegin = FirstDbgValue;
905
123
  }
906
4.08M
907
4.08M
  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
908
4.08M
         DI = DbgValues.end(), DE = DbgValues.begin(); 
DI != DE4.08M
;
--DI362
) {
909
362
    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
910
362
    MachineInstr *DbgValue = P.first;
911
362
    MachineBasicBlock::iterator OrigPrevMI = P.second;
912
362
    if (&*RegionBegin == DbgValue)
913
0
      ++RegionBegin;
914
362
    BB->splice(++OrigPrevMI, BB, DbgValue);
915
362
    if (OrigPrevMI == std::prev(RegionEnd))
916
55
      RegionEnd = DbgValue;
917
362
  }
918
4.08M
  DbgValues.clear();
919
4.08M
  FirstDbgValue = nullptr;
920
4.08M
}
921
922
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
923
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
924
  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
925
    if (SUnit *SU = getSUnit(&(*MI)))
926
      SU->dump(this);
927
    else
928
      dbgs() << "Missing SUnit\n";
929
  }
930
}
931
#endif
932
933
//===----------------------------------------------------------------------===//
934
// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
935
// preservation.
936
//===----------------------------------------------------------------------===//
937
938
552k
ScheduleDAGMILive::~ScheduleDAGMILive() {
939
552k
  delete DFSResult;
940
552k
}
941
942
2.86M
void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
943
2.86M
  const MachineInstr &MI = *SU.getInstr();
944
10.2M
  for (const MachineOperand &MO : MI.operands()) {
945
10.2M
    if (!MO.isReg())
946
3.47M
      continue;
947
6.78M
    
if (6.78M
!MO.readsReg()6.78M
)
948
2.32M
      continue;
949
4.45M
    
if (4.45M
TrackLaneMasks && 4.45M
!MO.isUse()570k
)
950
92.1k
      continue;
951
4.36M
952
4.36M
    unsigned Reg = MO.getReg();
953
4.36M
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
954
860k
      continue;
955
3.50M
956
3.50M
    // Ignore re-defs.
957
3.50M
    
if (3.50M
TrackLaneMasks3.50M
) {
958
286k
      bool FoundDef = false;
959
1.46M
      for (const MachineOperand &MO2 : MI.operands()) {
960
1.46M
        if (
MO2.isReg() && 1.46M
MO2.isDef()964k
&&
MO2.getReg() == Reg268k
&&
!MO2.isDead()6.73k
) {
961
6.73k
          FoundDef = true;
962
6.73k
          break;
963
6.73k
        }
964
286k
      }
965
286k
      if (FoundDef)
966
6.73k
        continue;
967
3.49M
    }
968
3.49M
969
3.49M
    // Record this local VReg use.
970
3.49M
    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
971
39.9M
    for (; 
UI != VRegUses.end()39.9M
;
++UI36.4M
) {
972
36.4M
      if (UI->SU == &SU)
973
65.6k
        break;
974
36.4M
    }
975
3.49M
    if (UI == VRegUses.end())
976
3.43M
      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
977
10.2M
  }
978
2.86M
}
979
980
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
981
/// crossing a scheduling boundary. [begin, end) includes all instructions in
982
/// the region, including the boundary itself and single-instruction regions
983
/// that don't get scheduled.
984
void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
985
                                MachineBasicBlock::iterator begin,
986
                                MachineBasicBlock::iterator end,
987
                                unsigned regioninstrs)
988
11.7M
{
989
11.7M
  // ScheduleDAGMI initializes SchedImpl's per-region policy.
990
11.7M
  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
991
11.7M
992
11.7M
  // For convenience remember the end of the liveness region.
993
11.7M
  LiveRegionEnd = (RegionEnd == bb->end()) ? 
RegionEnd497k
:
std::next(RegionEnd)11.2M
;
994
11.7M
995
11.7M
  SUPressureDiffs.clear();
996
11.7M
997
11.7M
  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
998
11.7M
  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
999
11.7M
1000
11.7M
  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1001
11.7M
         "ShouldTrackLaneMasks requires ShouldTrackPressure");
1002
11.7M
}
1003
1004
// Setup the register pressure trackers for the top scheduled top and bottom
1005
// scheduled regions.
1006
112k
void ScheduleDAGMILive::initRegPressure() {
1007
112k
  VRegUses.clear();
1008
112k
  VRegUses.setUniverse(MRI.getNumVirtRegs());
1009
112k
  for (SUnit &SU : SUnits)
1010
2.86M
    collectVRegUses(SU);
1011
112k
1012
112k
  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1013
112k
                    ShouldTrackLaneMasks, false);
1014
112k
  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1015
112k
                    ShouldTrackLaneMasks, false);
1016
112k
1017
112k
  // Close the RPTracker to finalize live ins.
1018
112k
  RPTracker.closeRegion();
1019
112k
1020
112k
  DEBUG(RPTracker.dump());
1021
112k
1022
112k
  // Initialize the live ins and live outs.
1023
112k
  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1024
112k
  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1025
112k
1026
112k
  // Close one end of the tracker so we can call
1027
112k
  // getMaxUpward/DownwardPressureDelta before advancing across any
1028
112k
  // instructions. This converts currently live regs into live ins/outs.
1029
112k
  TopRPTracker.closeTop();
1030
112k
  BotRPTracker.closeBottom();
1031
112k
1032
112k
  BotRPTracker.initLiveThru(RPTracker);
1033
112k
  if (
!BotRPTracker.getLiveThru().empty()112k
) {
1034
112k
    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1035
112k
    DEBUG(dbgs() << "Live Thru: ";
1036
112k
          dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1037
112k
  };
1038
112k
1039
112k
  // For each live out vreg reduce the pressure change associated with other
1040
112k
  // uses of the same vreg below the live-out reaching def.
1041
112k
  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1042
112k
1043
112k
  // Account for liveness generated by the region boundary.
1044
112k
  if (
LiveRegionEnd != RegionEnd112k
) {
1045
100k
    SmallVector<RegisterMaskPair, 8> LiveUses;
1046
100k
    BotRPTracker.recede(&LiveUses);
1047
100k
    updatePressureDiffs(LiveUses);
1048
100k
  }
1049
112k
1050
112k
  DEBUG(
1051
112k
    dbgs() << "Top Pressure:\n";
1052
112k
    dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1053
112k
    dbgs() << "Bottom Pressure:\n";
1054
112k
    dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1055
112k
  );
1056
112k
1057
112k
  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
1058
112k
1059
112k
  // Cache the list of excess pressure sets in this region. This will also track
1060
112k
  // the max pressure in the scheduled code for these sets.
1061
112k
  RegionCriticalPSets.clear();
1062
112k
  const std::vector<unsigned> &RegionPressure =
1063
112k
    RPTracker.getPressure().MaxSetPressure;
1064
1.77M
  for (unsigned i = 0, e = RegionPressure.size(); 
i < e1.77M
;
++i1.66M
) {
1065
1.66M
    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1066
1.66M
    if (
RegionPressure[i] > Limit1.66M
) {
1067
3.83k
      DEBUG(dbgs() << TRI->getRegPressureSetName(i)
1068
3.83k
            << " Limit " << Limit
1069
3.83k
            << " Actual " << RegionPressure[i] << "\n");
1070
3.83k
      RegionCriticalPSets.push_back(PressureChange(i));
1071
3.83k
    }
1072
1.66M
  }
1073
112k
  DEBUG(dbgs() << "Excess PSets: ";
1074
112k
        for (const PressureChange &RCPS : RegionCriticalPSets)
1075
112k
          dbgs() << TRI->getRegPressureSetName(
1076
112k
            RCPS.getPSet()) << " ";
1077
112k
        dbgs() << "\n");
1078
112k
}
1079
1080
void ScheduleDAGMILive::
1081
updateScheduledPressure(const SUnit *SU,
1082
2.86M
                        const std::vector<unsigned> &NewMaxPressure) {
1083
2.86M
  const PressureDiff &PDiff = getPressureDiff(SU);
1084
2.86M
  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1085
5.81M
  for (const PressureChange &PC : PDiff) {
1086
5.81M
    if (!PC.isValid())
1087
2.84M
      break;
1088
2.96M
    unsigned ID = PC.getPSet();
1089
3.04M
    while (
CritIdx != CritEnd && 3.04M
RegionCriticalPSets[CritIdx].getPSet() < ID345k
)
1090
75.4k
      ++CritIdx;
1091
2.96M
    if (
CritIdx != CritEnd && 2.96M
RegionCriticalPSets[CritIdx].getPSet() == ID270k
) {
1092
176k
      if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1093
24.2k
          && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1094
24.2k
        RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1095
176k
    }
1096
2.96M
    unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1097
2.96M
    if (
NewMaxPressure[ID] >= Limit - 22.96M
) {
1098
220k
      DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1099
220k
            << NewMaxPressure[ID]
1100
220k
            << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
1101
220k
            << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
1102
220k
    }
1103
5.81M
  }
1104
2.86M
}
1105
1106
/// Update the PressureDiff array for liveness after scheduling this
1107
/// instruction.
1108
void ScheduleDAGMILive::updatePressureDiffs(
1109
2.95M
    ArrayRef<RegisterMaskPair> LiveUses) {
1110
2.55M
  for (const RegisterMaskPair &P : LiveUses) {
1111
2.55M
    unsigned Reg = P.RegUnit;
1112
2.55M
    /// FIXME: Currently assuming single-use physregs.
1113
2.55M
    if (!TRI->isVirtualRegister(Reg))
1114
136k
      continue;
1115
2.41M
1116
2.41M
    
if (2.41M
ShouldTrackLaneMasks2.41M
) {
1117
299k
      // If the register has just become live then other uses won't change
1118
299k
      // this fact anymore => decrement pressure.
1119
299k
      // If the register has just become dead then other uses make it come
1120
299k
      // back to life => increment pressure.
1121
299k
      bool Decrement = P.LaneMask.any();
1122
299k
1123
299k
      for (const VReg2SUnit &V2SU
1124
447k
           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1125
447k
        SUnit &SU = *V2SU.SU;
1126
447k
        if (
SU.isScheduled || 447k
&SU == &ExitSU261k
)
1127
186k
          continue;
1128
261k
1129
261k
        PressureDiff &PDiff = getPressureDiff(&SU);
1130
261k
        PDiff.addPressureChange(Reg, Decrement, &MRI);
1131
261k
        DEBUG(
1132
447k
          dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1133
447k
                 << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
1134
447k
                 << ' ' << *SU.getInstr();
1135
447k
          dbgs() << "              to ";
1136
447k
          PDiff.dump(*TRI);
1137
447k
        );
1138
447k
      }
1139
2.41M
    } else {
1140
2.11M
      assert(P.LaneMask.any());
1141
2.11M
      DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
1142
2.11M
      // This may be called before CurrentBottom has been initialized. However,
1143
2.11M
      // BotRPTracker must have a valid position. We want the value live into the
1144
2.11M
      // instruction or live out of the block, so ask for the previous
1145
2.11M
      // instruction's live-out.
1146
2.11M
      const LiveInterval &LI = LIS->getInterval(Reg);
1147
2.11M
      VNInfo *VNI;
1148
2.11M
      MachineBasicBlock::const_iterator I =
1149
2.11M
        nextIfDebug(BotRPTracker.getPos(), BB->end());
1150
2.11M
      if (I == BB->end())
1151
244k
        VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1152
1.87M
      else {
1153
1.87M
        LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1154
1.87M
        VNI = LRQ.valueIn();
1155
1.87M
      }
1156
2.11M
      // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1157
2.11M
      assert(VNI && "No live value at use.");
1158
2.11M
      for (const VReg2SUnit &V2SU
1159
3.89M
           : make_range(VRegUses.find(Reg), VRegUses.end())) {
1160
3.89M
        SUnit *SU = V2SU.SU;
1161
3.89M
        // If this use comes before the reaching def, it cannot be a last use,
1162
3.89M
        // so decrease its pressure change.
1163
3.89M
        if (
!SU->isScheduled && 3.89M
SU != &ExitSU3.64M
) {
1164
3.64M
          LiveQueryResult LRQ =
1165
3.64M
              LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1166
3.64M
          if (
LRQ.valueIn() == VNI3.64M
) {
1167
3.13M
            PressureDiff &PDiff = getPressureDiff(SU);
1168
3.13M
            PDiff.addPressureChange(Reg, true, &MRI);
1169
3.13M
            DEBUG(
1170
3.13M
              dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1171
3.13M
                     << *SU->getInstr();
1172
3.13M
              dbgs() << "              to ";
1173
3.13M
              PDiff.dump(*TRI);
1174
3.13M
            );
1175
3.13M
          }
1176
3.64M
        }
1177
3.89M
      }
1178
2.11M
    }
1179
2.55M
  }
1180
2.95M
}
1181
1182
/// schedule - Called back from MachineScheduler::runOnMachineFunction
1183
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1184
/// only includes instructions that have DAG nodes, not scheduling boundaries.
1185
///
1186
/// This is a skeletal driver, with all the functionality pushed into helpers,
1187
/// so that it can be easily extended by experimental schedulers. Generally,
1188
/// implementing MachineSchedStrategy should be sufficient to implement a new
1189
/// scheduling algorithm. However, if a scheduler further subclasses
1190
/// ScheduleDAGMILive then it will want to override this virtual method in order
1191
/// to update any specialized state.
1192
4.07M
void ScheduleDAGMILive::schedule() {
1193
4.07M
  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1194
4.07M
  DEBUG(SchedImpl->dumpPolicy());
1195
4.07M
  buildDAGWithRegPressure();
1196
4.07M
1197
4.07M
  Topo.InitDAGTopologicalSorting();
1198
4.07M
1199
4.07M
  postprocessDAG();
1200
4.07M
1201
4.07M
  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1202
4.07M
  findRootsAndBiasEdges(TopRoots, BotRoots);
1203
4.07M
1204
4.07M
  // Initialize the strategy before modifying the DAG.
1205
4.07M
  // This may initialize a DFSResult to be used for queue priority.
1206
4.07M
  SchedImpl->initialize(this);
1207
4.07M
1208
4.07M
  DEBUG(
1209
4.07M
    if (EntrySU.getInstr() != nullptr)
1210
4.07M
      EntrySU.dumpAll(this);
1211
4.07M
    for (const SUnit &SU : SUnits) {
1212
4.07M
      SU.dumpAll(this);
1213
4.07M
      if (ShouldTrackPressure) {
1214
4.07M
        dbgs() << "  Pressure Diff      : ";
1215
4.07M
        getPressureDiff(&SU).dump(*TRI);
1216
4.07M
      }
1217
4.07M
      dbgs() << "  Single Issue       : ";
1218
4.07M
      if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1219
4.07M
         SchedModel.mustEndGroup(SU.getInstr()))
1220
4.07M
        dbgs() << "true;";
1221
4.07M
      else
1222
4.07M
        dbgs() << "false;";
1223
4.07M
      dbgs() << '\n';
1224
4.07M
    }
1225
4.07M
    if (ExitSU.getInstr() != nullptr)
1226
4.07M
      ExitSU.dumpAll(this);
1227
4.07M
  );
1228
4.07M
  if (
ViewMISchedDAGs4.07M
)
viewGraph()0
;
1229
4.07M
1230
4.07M
  // Initialize ready queues now that the DAG and priority data are finalized.
1231
4.07M
  initQueues(TopRoots, BotRoots);
1232
4.07M
1233
4.07M
  bool IsTopNode = false;
1234
20.6M
  while (
true20.6M
) {
1235
20.6M
    DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1236
20.6M
    SUnit *SU = SchedImpl->pickNode(IsTopNode);
1237
20.6M
    if (
!SU20.6M
)
break4.07M
;
1238
16.5M
1239
20.6M
    assert(!SU->isScheduled && "Node already scheduled");
1240
16.5M
    if (!checkSchedLimit())
1241
0
      break;
1242
16.5M
1243
16.5M
    scheduleMI(SU, IsTopNode);
1244
16.5M
1245
16.5M
    if (
DFSResult16.5M
) {
1246
176
      unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1247
176
      if (
!ScheduledTrees.test(SubtreeID)176
) {
1248
38
        ScheduledTrees.set(SubtreeID);
1249
38
        DFSResult->scheduleTree(SubtreeID);
1250
38
        SchedImpl->scheduleTree(SubtreeID);
1251
38
      }
1252
176
    }
1253
20.6M
1254
20.6M
    // Notify the scheduling strategy after updating the DAG.
1255
20.6M
    SchedImpl->schedNode(SU, IsTopNode);
1256
20.6M
1257
20.6M
    updateQueues(SU, IsTopNode);
1258
20.6M
  }
1259
4.07M
  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1260
4.07M
1261
4.07M
  placeDebugValues();
1262
4.07M
1263
4.07M
  DEBUG({
1264
4.07M
      unsigned BBNum = begin()->getParent()->getNumber();
1265
4.07M
      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1266
4.07M
      dumpSchedule();
1267
4.07M
      dbgs() << '\n';
1268
4.07M
    });
1269
4.07M
}
1270
1271
/// Build the DAG and setup three register pressure trackers.
1272
4.07M
void ScheduleDAGMILive::buildDAGWithRegPressure() {
1273
4.07M
  if (
!ShouldTrackPressure4.07M
) {
1274
3.96M
    RPTracker.reset();
1275
3.96M
    RegionCriticalPSets.clear();
1276
3.96M
    buildSchedGraph(AA);
1277
3.96M
    return;
1278
3.96M
  }
1279
112k
1280
112k
  // Initialize the register pressure tracker used by buildSchedGraph.
1281
112k
  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1282
112k
                 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1283
112k
1284
112k
  // Account for liveness generate by the region boundary.
1285
112k
  if (LiveRegionEnd != RegionEnd)
1286
100k
    RPTracker.recede();
1287
4.07M
1288
4.07M
  // Build the DAG, and compute current register pressure.
1289
4.07M
  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1290
4.07M
1291
4.07M
  // Initialize top/bottom trackers after computing region pressure.
1292
4.07M
  initRegPressure();
1293
4.07M
}
1294
1295
10
void ScheduleDAGMILive::computeDFSResult() {
1296
10
  if (!DFSResult)
1297
8
    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1298
10
  DFSResult->clear();
1299
10
  ScheduledTrees.clear();
1300
10
  DFSResult->resize(SUnits.size());
1301
10
  DFSResult->compute(SUnits);
1302
10
  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1303
10
}
1304
1305
/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1306
/// only provides the critical path for single block loops. To handle loops that
1307
/// span blocks, we could use the vreg path latencies provided by
1308
/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1309
/// available for use in the scheduler.
1310
///
1311
/// The cyclic path estimation identifies a def-use pair that crosses the back
1312
/// edge and considers the depth and height of the nodes. For example, consider
1313
/// the following instruction sequence where each instruction has unit latency
1314
/// and defines an epomymous virtual register:
1315
///
1316
/// a->b(a,c)->c(b)->d(c)->exit
1317
///
1318
/// The cyclic critical path is a two cycles: b->c->b
1319
/// The acyclic critical path is four cycles: a->b->c->d->exit
1320
/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1321
/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1322
/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1323
/// LiveInDepth = depth(b) = len(a->b) = 1
1324
///
1325
/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1326
/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1327
/// CyclicCriticalPath = min(2, 2) = 2
1328
///
1329
/// This could be relevant to PostRA scheduling, but is currently implemented
1330
/// assuming LiveIntervals.
1331
4.05M
unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1332
4.05M
  // This only applies to single block loop.
1333
4.05M
  if (!BB->isSuccessor(BB))
1334
3.78M
    return 0;
1335
272k
1336
272k
  unsigned MaxCyclicLatency = 0;
1337
272k
  // Visit each live out vreg def to find def/use pairs that cross iterations.
1338
71.1k
  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1339
71.1k
    unsigned Reg = P.RegUnit;
1340
71.1k
    if (!TRI->isVirtualRegister(Reg))
1341
0
        continue;
1342
71.1k
    const LiveInterval &LI = LIS->getInterval(Reg);
1343
71.1k
    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1344
71.1k
    if (!DefVNI)
1345
201
      continue;
1346
70.9k
1347
70.9k
    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1348
70.9k
    const SUnit *DefSU = getSUnit(DefMI);
1349
70.9k
    if (!DefSU)
1350
43.4k
      continue;
1351
27.4k
1352
27.4k
    unsigned LiveOutHeight = DefSU->getHeight();
1353
27.4k
    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1354
27.4k
    // Visit all local users of the vreg def.
1355
27.4k
    for (const VReg2SUnit &V2SU
1356
274k
         : make_range(VRegUses.find(Reg), VRegUses.end())) {
1357
274k
      SUnit *SU = V2SU.SU;
1358
274k
      if (SU == &ExitSU)
1359
0
        continue;
1360
274k
1361
274k
      // Only consider uses of the phi.
1362
274k
      LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1363
274k
      if (!LRQ.valueIn()->isPHIDef())
1364
9.02k
        continue;
1365
265k
1366
265k
      // Assume that a path spanning two iterations is a cycle, which could
1367
265k
      // overestimate in strange cases. This allows cyclic latency to be
1368
265k
      // estimated as the minimum slack of the vreg's depth or height.
1369
265k
      unsigned CyclicLatency = 0;
1370
265k
      if (LiveOutDepth > SU->getDepth())
1371
265k
        CyclicLatency = LiveOutDepth - SU->getDepth();
1372
265k
1373
265k
      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1374
265k
      if (
LiveInHeight > LiveOutHeight265k
) {
1375
265k
        if (LiveInHeight - LiveOutHeight < CyclicLatency)
1376
19.6k
          CyclicLatency = LiveInHeight - LiveOutHeight;
1377
265k
      } else
1378
1
        CyclicLatency = 0;
1379
265k
1380
265k
      DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1381
265k
            << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1382
265k
      if (CyclicLatency > MaxCyclicLatency)
1383
15.5k
        MaxCyclicLatency = CyclicLatency;
1384
274k
    }
1385
71.1k
  }
1386
272k
  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1387
4.05M
  return MaxCyclicLatency;
1388
4.05M
}
1389
1390
/// Release ExitSU predecessors and setup scheduler queues. Re-position
1391
/// the Top RP tracker in case the region beginning has changed.
1392
void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1393
4.07M
                                   ArrayRef<SUnit*> BotRoots) {
1394
4.07M
  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1395
4.07M
  if (
ShouldTrackPressure4.07M
) {
1396
112k
    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1397
112k
    TopRPTracker.setPos(CurrentTop);
1398
112k
  }
1399
4.07M
}
1400
1401
/// Move an instruction and update register pressure.
1402
16.6M
void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1403
16.6M
  // Move the instruction to its new location in the instruction stream.
1404
16.6M
  MachineInstr *MI = SU->getInstr();
1405
16.6M
1406
16.6M
  if (
IsTopNode16.6M
) {
1407
1.34M
    assert(SU->isTopReady() && "node still has unscheduled dependencies");
1408
1.34M
    if (&*CurrentTop == MI)
1409
1.22M
      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1410
119k
    else {
1411
119k
      moveInstruction(MI, CurrentTop);
1412
119k
      TopRPTracker.setPos(MI);
1413
119k
    }
1414
1.34M
1415
1.34M
    if (
ShouldTrackPressure1.34M
) {
1416
125k
      // Update top scheduled pressure.
1417
125k
      RegisterOperands RegOpers;
1418
125k
      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1419
125k
      if (
ShouldTrackLaneMasks125k
) {
1420
48.1k
        // Adjust liveness and add missing dead+read-undef flags.
1421
48.1k
        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1422
48.1k
        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1423
125k
      } else {
1424
77.6k
        // Adjust for missing dead-def flags.
1425
77.6k
        RegOpers.detectDeadDefs(*MI, *LIS);
1426
77.6k
      }
1427
125k
1428
125k
      TopRPTracker.advance(RegOpers);
1429
125k
      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1430
125k
      DEBUG(
1431
125k
        dbgs() << "Top Pressure:\n";
1432
125k
        dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1433
125k
      );
1434
125k
1435
125k
      updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1436
125k
    }
1437
16.6M
  } else {
1438
15.2M
    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1439
15.2M
    MachineBasicBlock::iterator priorII =
1440
15.2M
      priorNonDebug(CurrentBottom, CurrentTop);
1441
15.2M
    if (&*priorII == MI)
1442
13.5M
      CurrentBottom = priorII;
1443
1.66M
    else {
1444
1.66M
      if (
&*CurrentTop == MI1.66M
) {
1445
395k
        CurrentTop = nextIfDebug(++CurrentTop, priorII);
1446
395k
        TopRPTracker.setPos(CurrentTop);
1447
395k
      }
1448
1.66M
      moveInstruction(MI, CurrentBottom);
1449
1.66M
      CurrentBottom = MI;
1450
1.66M
    }
1451
15.2M
    if (
ShouldTrackPressure15.2M
) {
1452
2.73M
      RegisterOperands RegOpers;
1453
2.73M
      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1454
2.73M
      if (
ShouldTrackLaneMasks2.73M
) {
1455
221k
        // Adjust liveness and add missing dead+read-undef flags.
1456
221k
        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1457
221k
        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1458
2.73M
      } else {
1459
2.51M
        // Adjust for missing dead-def flags.
1460
2.51M
        RegOpers.detectDeadDefs(*MI, *LIS);
1461
2.51M
      }
1462
2.73M
1463
2.73M
      BotRPTracker.recedeSkipDebugValues();
1464
2.73M
      SmallVector<RegisterMaskPair, 8> LiveUses;
1465
2.73M
      BotRPTracker.recede(RegOpers, &LiveUses);
1466
2.73M
      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1467
2.73M
      DEBUG(
1468
2.73M
        dbgs() << "Bottom Pressure:\n";
1469
2.73M
        dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1470
2.73M
      );
1471
2.73M
1472
2.73M
      updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1473
2.73M
      updatePressureDiffs(LiveUses);
1474
2.73M
    }
1475
15.2M
  }
1476
16.6M
}
1477
1478
//===----------------------------------------------------------------------===//
1479
// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1480
//===----------------------------------------------------------------------===//
1481
1482
namespace {
1483
1484
/// \brief Post-process the DAG to create cluster edges between neighboring
1485
/// loads or between neighboring stores.
1486
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1487
  struct MemOpInfo {
1488
    SUnit *SU;
1489
    unsigned BaseReg;
1490
    int64_t Offset;
1491
1492
    MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1493
2.77M
        : SU(su), BaseReg(reg), Offset(ofs) {}
1494
1495
1.99M
    bool operator<(const MemOpInfo&RHS) const {
1496
1.99M
      return std::tie(BaseReg, Offset, SU->NodeNum) <
1497
1.99M
             std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1498
1.99M
    }
1499
  };
1500
1501
  const TargetInstrInfo *TII;
1502
  const TargetRegisterInfo *TRI;
1503
  bool IsLoad;
1504
1505
public:
1506
  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1507
                           const TargetRegisterInfo *tri, bool IsLoad)
1508
941k
      : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1509
1510
  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1511
1512
protected:
1513
  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1514
};
1515
1516
class StoreClusterMutation : public BaseMemOpClusterMutation {
1517
public:
1518
  StoreClusterMutation(const TargetInstrInfo *tii,
1519
                       const TargetRegisterInfo *tri)
1520
470k
      : BaseMemOpClusterMutation(tii, tri, false) {}
1521
};
1522
1523
class LoadClusterMutation : public BaseMemOpClusterMutation {
1524
public:
1525
  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1526
470k
      : BaseMemOpClusterMutation(tii, tri, true) {}
1527
};
1528
1529
} // end anonymous namespace
1530
1531
namespace llvm {
1532
1533
std::unique_ptr<ScheduleDAGMutation>
1534
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1535
470k
                             const TargetRegisterInfo *TRI) {
1536
470k
  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1537
0
                            : nullptr;
1538
470k
}
1539
1540
std::unique_ptr<ScheduleDAGMutation>
1541
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1542
470k
                              const TargetRegisterInfo *TRI) {
1543
470k
  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1544
0
                            : nullptr;
1545
470k
}
1546
1547
} // end namespace llvm
1548
1549
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1550
2.51M
    ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1551
2.51M
  SmallVector<MemOpInfo, 32> MemOpRecords;
1552
3.77M
  for (SUnit *SU : MemOps) {
1553
3.77M
    unsigned BaseReg;
1554
3.77M
    int64_t Offset;
1555
3.77M
    if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1556
2.77M
      MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1557
3.77M
  }
1558
2.51M
  if (MemOpRecords.size() < 2)
1559
2.11M
    return;
1560
395k
1561
395k
  std::sort(MemOpRecords.begin(), MemOpRecords.end());
1562
395k
  unsigned ClusterLength = 1;
1563
1.46M
  for (unsigned Idx = 0, End = MemOpRecords.size(); 
Idx < (End - 1)1.46M
;
++Idx1.07M
) {
1564
1.07M
    SUnit *SUa = MemOpRecords[Idx].SU;
1565
1.07M
    SUnit *SUb = MemOpRecords[Idx+1].SU;
1566
1.07M
    if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
1567
1.07M
                                 *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
1568
1.07M
                                 ClusterLength) &&
1569
1.07M
        
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))294k
) {
1570
294k
      DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1571
294k
            << SUb->NodeNum << ")\n");
1572
294k
      // Copy successor edges from SUa to SUb. Interleaving computation
1573
294k
      // dependent on SUa can prevent load combining due to register reuse.
1574
294k
      // Predecessor edges do not need to be copied from SUb to SUa since nearby
1575
294k
      // loads should have effectively the same inputs.
1576
688k
      for (const SDep &Succ : SUa->Succs) {
1577
688k
        if (Succ.getSUnit() == SUb)
1578
294k
          continue;
1579
393k
        
DEBUG393k
(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
1580
393k
        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1581
393k
      }
1582
294k
      ++ClusterLength;
1583
294k
    } else
1584
776k
      ClusterLength = 1;
1585
1.07M
  }
1586
2.51M
}
1587
1588
/// \brief Callback from DAG postProcessing to create cluster edges for loads.
1589
7.87M
void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
1590
7.87M
  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1591
7.87M
1592
7.87M
  // Map DAG NodeNum to store chain ID.
1593
7.87M
  DenseMap<unsigned, unsigned> StoreChainIDs;
1594
7.87M
  // Map each store chain to a set of dependent MemOps.
1595
7.87M
  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1596
31.4M
  for (SUnit &SU : DAG->SUnits) {
1597
31.4M
    if (
(IsLoad && 31.4M
!SU.getInstr()->mayLoad()15.7M
) ||
1598
17.7M
        
(!IsLoad && 17.7M
!SU.getInstr()->mayStore()15.7M
))
1599
27.7M
      continue;
1600
3.77M
1601
3.77M
    unsigned ChainPredID = DAG->SUnits.size();
1602
3.19M
    for (const SDep &Pred : SU.Preds) {
1603
3.19M
      if (
Pred.isCtrl()3.19M
) {
1604
1.10M
        ChainPredID = Pred.getSUnit()->NodeNum;
1605
1.10M
        break;
1606
1.10M
      }
1607
3.77M
    }
1608
3.77M
    // Check if this chain-like pred has been seen
1609
3.77M
    // before. ChainPredID==MaxNodeID at the top of the schedule.
1610
3.77M
    unsigned NumChains = StoreChainDependents.size();
1611
3.77M
    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1612
3.77M
      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1613
3.77M
    if (Result.second)
1614
2.51M
      StoreChainDependents.resize(NumChains + 1);
1615
31.4M
    StoreChainDependents[Result.first->second].push_back(&SU);
1616
31.4M
  }
1617
7.87M
1618
7.87M
  // Iterate over the store chains.
1619
7.87M
  for (auto &SCD : StoreChainDependents)
1620
2.51M
    clusterNeighboringMemOps(SCD, DAG);
1621
7.87M
}
1622
1623
//===----------------------------------------------------------------------===//
1624
// CopyConstrain - DAG post-processing to encourage copy elimination.
1625
//===----------------------------------------------------------------------===//
1626
1627
namespace {
1628
1629
/// \brief Post-process the DAG to create weak edges from all uses of a copy to
1630
/// the one use that defines the copy's source vreg, most likely an induction
1631
/// variable increment.
1632
class CopyConstrain : public ScheduleDAGMutation {
1633
  // Transient state.
1634
  SlotIndex RegionBeginIdx;
1635
1636
  // RegionEndIdx is the slot index of the last non-debug instruction in the
1637
  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1638
  SlotIndex RegionEndIdx;
1639
1640
public:
1641
535k
  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1642
1643
  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1644
1645
protected:
1646
  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1647
};
1648
1649
} // end anonymous namespace
1650
1651
namespace llvm {
1652
1653
std::unique_ptr<ScheduleDAGMutation>
1654
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1655
535k
                               const TargetRegisterInfo *TRI) {
1656
535k
  return llvm::make_unique<CopyConstrain>(TII, TRI);
1657
535k
}
1658
1659
} // end namespace llvm
1660
1661
/// constrainLocalCopy handles two possibilities:
1662
/// 1) Local src:
1663
/// I0:     = dst
1664
/// I1: src = ...
1665
/// I2:     = dst
1666
/// I3: dst = src (copy)
1667
/// (create pred->succ edges I0->I1, I2->I1)
1668
///
1669
/// 2) Local copy:
1670
/// I0: dst = src (copy)
1671
/// I1:     = dst
1672
/// I2: src = ...
1673
/// I3:     = dst
1674
/// (create pred->succ edges I1->I2, I3->I2)
1675
///
1676
/// Although the MachineScheduler is currently constrained to single blocks,
1677
/// this algorithm should handle extended blocks. An EBB is a set of
1678
/// contiguously numbered blocks such that the previous block in the EBB is
1679
/// always the single predecessor.
1680
6.14M
void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1681
6.14M
  LiveIntervals *LIS = DAG->getLIS();
1682
6.14M
  MachineInstr *Copy = CopySU->getInstr();
1683
6.14M
1684
6.14M
  // Check for pure vreg copies.
1685
6.14M
  const MachineOperand &SrcOp = Copy->getOperand(1);
1686
6.14M
  unsigned SrcReg = SrcOp.getReg();
1687
6.14M
  if (
!TargetRegisterInfo::isVirtualRegister(SrcReg) || 6.14M
!SrcOp.readsReg()4.33M
)
1688
1.80M
    return;
1689
4.33M
1690
4.33M
  const MachineOperand &DstOp = Copy->getOperand(0);
1691
4.33M
  unsigned DstReg = DstOp.getReg();
1692
4.33M
  if (
!TargetRegisterInfo::isVirtualRegister(DstReg) || 4.33M
DstOp.isDead()197k
)
1693
4.13M
    return;
1694
197k
1695
197k
  // Check if either the dest or source is local. If it's live across a back
1696
197k
  // edge, it's not local. Note that if both vregs are live across the back
1697
197k
  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1698
197k
  // If both the copy's source and dest are local live intervals, then we
1699
197k
  // should treat the dest as the global for the purpose of adding
1700
197k
  // constraints. This adds edges from source's other uses to the copy.
1701
197k
  unsigned LocalReg = SrcReg;
1702
197k
  unsigned GlobalReg = DstReg;
1703
197k
  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1704
197k
  if (
!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)197k
) {
1705
130k
    LocalReg = DstReg;
1706
130k
    GlobalReg = SrcReg;
1707
130k
    LocalLI = &LIS->getInterval(LocalReg);
1708
130k
    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1709
121k
      return;
1710
75.4k
  }
1711
75.4k
  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1712
75.4k
1713
75.4k
  // Find the global segment after the start of the local LI.
1714
75.4k
  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1715
75.4k
  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1716
75.4k
  // local live range. We could create edges from other global uses to the local
1717
75.4k
  // start, but the coalescer should have already eliminated these cases, so
1718
75.4k
  // don't bother dealing with it.
1719
75.4k
  if (GlobalSegment == GlobalLI->end())
1720
277
    return;
1721
75.1k
1722
75.1k
  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1723
75.1k
  // returned the next global segment. But if GlobalSegment overlaps with
1724
75.1k
  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1725
75.1k
  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1726
75.1k
  
if (75.1k
GlobalSegment->contains(LocalLI->beginIndex())75.1k
)
1727
14.9k
    ++GlobalSegment;
1728
75.1k
1729
75.1k
  if (GlobalSegment == GlobalLI->end())
1730
1.41k
    return;
1731
73.7k
1732
73.7k
  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1733
73.7k
  
if (73.7k
GlobalSegment != GlobalLI->begin()73.7k
) {
1734
18.2k
    // Two address defs have no hole.
1735
18.2k
    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1736
18.2k
                               GlobalSegment->start)) {
1737
1.59k
      return;
1738
1.59k
    }
1739
16.6k
    // If the prior global segment may be defined by the same two-address
1740
16.6k
    // instruction that also defines LocalLI, then can't make a hole here.
1741
16.6k
    
if (16.6k
SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1742
16.6k
                               LocalLI->beginIndex())) {
1743
0
      return;
1744
0
    }
1745
16.6k
    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1746
16.6k
    // it would be a disconnected component in the live range.
1747
16.6k
    assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1748
16.6k
           "Disconnected LRG within the scheduling region.");
1749
16.6k
  }
1750
72.1k
  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1751
72.1k
  if (!GlobalDef)
1752
1.05k
    return;
1753
71.0k
1754
71.0k
  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1755
71.0k
  if (!GlobalSU)
1756
120
    return;
1757
70.9k
1758
70.9k
  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1759
70.9k
  // constraining the uses of the last local def to precede GlobalDef.
1760
70.9k
  SmallVector<SUnit*,8> LocalUses;
1761
70.9k
  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1762
70.9k
  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1763
70.9k
  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1764
120k
  for (const SDep &Succ : LastLocalSU->Succs) {
1765
120k
    if (
Succ.getKind() != SDep::Data || 120k
Succ.getReg() != LocalReg90.6k
)
1766
30.1k
      continue;
1767
90.4k
    
if (90.4k
Succ.getSUnit() == GlobalSU90.4k
)
1768
58.1k
      continue;
1769
32.3k
    
if (32.3k
!DAG->canAddEdge(GlobalSU, Succ.getSUnit())32.3k
)
1770
12.3k
      return;
1771
19.9k
    LocalUses.push_back(Succ.getSUnit());
1772
19.9k
  }
1773
70.9k
  // Open the top of the GlobalLI hole by constraining any earlier global uses
1774
70.9k
  // to precede the start of LocalLI.
1775
58.6k
  SmallVector<SUnit*,8> GlobalUses;
1776
58.6k
  MachineInstr *FirstLocalDef =
1777
58.6k
    LIS->getInstructionFromIndex(LocalLI->beginIndex());
1778
58.6k
  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1779
81.2k
  for (const SDep &Pred : GlobalSU->Preds) {
1780
81.2k
    if (
Pred.getKind() != SDep::Anti || 81.2k
Pred.getReg() != GlobalReg23.4k
)
1781
57.8k
      continue;
1782
23.4k
    
if (23.4k
Pred.getSUnit() == FirstLocalSU23.4k
)
1783
9.43k
      continue;
1784
13.9k
    
if (13.9k
!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())13.9k
)
1785
1.59k
      return;
1786
12.3k
    GlobalUses.push_back(Pred.getSUnit());
1787
12.3k
  }
1788
57.0k
  
DEBUG57.0k
(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1789
57.0k
  // Add the weak edges.
1790
57.0k
  for (SmallVectorImpl<SUnit*>::const_iterator
1791
67.4k
         I = LocalUses.begin(), E = LocalUses.end(); 
I != E67.4k
;
++I10.4k
) {
1792
10.4k
    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1793
10.4k
          << GlobalSU->NodeNum << ")\n");
1794
10.4k
    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1795
10.4k
  }
1796
57.0k
  for (SmallVectorImpl<SUnit*>::const_iterator
1797
69.3k
         I = GlobalUses.begin(), E = GlobalUses.end(); 
I != E69.3k
;
++I12.3k
) {
1798
12.3k
    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1799
12.3k
          << FirstLocalSU->NodeNum << ")\n");
1800
12.3k
    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1801
12.3k
  }
1802
6.14M
}
1803
1804
/// \brief Callback from DAG postProcessing to create weak edges to encourage
1805
/// copy elimination.
1806
4.05M
void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1807
4.05M
  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1808
4.05M
  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1809
4.05M
1810
4.05M
  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1811
4.05M
  if (FirstPos == DAG->end())
1812
1
    return;
1813
4.05M
  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1814
4.05M
  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1815
4.05M
      *priorNonDebug(DAG->end(), DAG->begin()));
1816
4.05M
1817
16.2M
  for (SUnit &SU : DAG->SUnits) {
1818
16.2M
    if (!SU.getInstr()->isCopy())
1819
10.1M
      continue;
1820
6.14M
1821
6.14M
    constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1822
6.14M
  }
1823
4.05M
}
1824
1825
//===----------------------------------------------------------------------===//
1826
// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1827
// and possibly other custom schedulers.
1828
//===----------------------------------------------------------------------===//
1829
1830
static const unsigned InvalidCycle = ~0U;
1831
1832
1.10M
SchedBoundary::~SchedBoundary() { delete HazardRec; }
1833
1834
9.26M
void SchedBoundary::reset() {
1835
9.26M
  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1836
9.26M
  // Destroying and reconstructing it is very expensive though. So keep
1837
9.26M
  // invalid, placeholder HazardRecs.
1838
9.26M
  if (
HazardRec && 9.26M
HazardRec->isEnabled()7.11M
) {
1839
3.32k
    delete HazardRec;
1840
3.32k
    HazardRec = nullptr;
1841
3.32k
  }
1842
9.26M
  Available.clear();
1843
9.26M
  Pending.clear();
1844
9.26M
  CheckPending = false;
1845
9.26M
  CurrCycle = 0;
1846
9.26M
  CurrMOps = 0;
1847
9.26M
  MinReadyCycle = std::numeric_limits<unsigned>::max();
1848
9.26M
  ExpectedLatency = 0;
1849
9.26M
  DependentLatency = 0;
1850
9.26M
  RetiredMOps = 0;
1851
9.26M
  MaxExecutedResCount = 0;
1852
9.26M
  ZoneCritResIdx = 0;
1853
9.26M
  IsResourceLimited = false;
1854
9.26M
  ReservedCycles.clear();
1855
#ifndef NDEBUG
1856
  // Track the maximum number of stall cycles that could arise either from the
1857
  // latency of a DAG edge or the number of cycles that a processor resource is
1858
  // reserved (SchedBoundary::ReservedCycles).
1859
  MaxObservedStall = 0;
1860
#endif
1861
  // Reserve a zero-count for invalid CritResIdx.
1862
9.26M
  ExecutedResCounts.resize(1);
1863
9.26M
  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1864
9.26M
}
1865
1866
void SchedRemainder::
1867
4.08M
init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1868
4.08M
  reset();
1869
4.08M
  if (!SchedModel->hasInstrSchedModel())
1870
84.1k
    return;
1871
3.99M
  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1872
15.9M
  for (SUnit &SU : DAG->SUnits) {
1873
15.9M
    const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1874
15.9M
    RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1875
15.9M
      * SchedModel->getMicroOpFactor();
1876
15.9M
    for (TargetSchedModel::ProcResIter
1877
15.9M
           PI = SchedModel->getWriteProcResBegin(SC),
1878
30.0M
           PE = SchedModel->getWriteProcResEnd(SC); 
PI != PE30.0M
;
++PI14.0M
) {
1879
14.0M
      unsigned PIdx = PI->ProcResourceIdx;
1880
14.0M
      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1881
14.0M
      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1882
14.0M
    }
1883
15.9M
  }
1884
4.08M
}
1885
1886
void SchedBoundary::
1887
8.15M
init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1888
8.15M
  reset();
1889
8.15M
  DAG = dag;
1890
8.15M
  SchedModel = smodel;
1891
8.15M
  Rem = rem;
1892
8.15M
  if (
SchedModel->hasInstrSchedModel()8.15M
) {
1893
7.99M
    ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1894
7.99M
    ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1895
7.99M
  }
1896
8.15M
}
1897
1898
/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1899
/// these "soft stalls" differently than the hard stall cycles based on CPU
1900
/// resources and computed by checkHazard(). A fully in-order model
1901
/// (MicroOpBufferSize==0) will not make use of this since instructions are not
1902
/// available for scheduling until they are ready. However, a weaker in-order
1903
/// model may use this for heuristics. For example, if a processor has in-order
1904
/// behavior when reading certain resources, this may come into play.
1905
125M
unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1906
125M
  if (!SU->isUnbuffered)
1907
123M
    return 0;
1908
2.08M
1909
2.08M
  
unsigned ReadyCycle = (isTop() ? 2.08M
SU->TopReadyCycle308k
:
SU->BotReadyCycle1.77M
);
1910
2.08M
  if (ReadyCycle > CurrCycle)
1911
40.0k
    return ReadyCycle - CurrCycle;
1912
2.04M
  return 0;
1913
2.04M
}
1914
1915
/// Compute the next cycle at which the given processor resource can be
1916
/// scheduled.
1917
unsigned SchedBoundary::
1918
14.0M
getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1919
14.0M
  unsigned NextUnreserved = ReservedCycles[PIdx];
1920
14.0M
  // If this resource has never been used, always return cycle zero.
1921
14.0M
  if (NextUnreserved == InvalidCycle)
1922
14.0M
    return 0;
1923
2.07k
  // For bottom-up scheduling add the cycles needed for the current operation.
1924
2.07k
  
if (2.07k
!isTop()2.07k
)
1925
927
    NextUnreserved += Cycles;
1926
14.0M
  return NextUnreserved;
1927
14.0M
}
1928
1929
/// Does this SU have a hazard within the current instruction group.
1930
///
1931
/// The scheduler supports two modes of hazard recognition. The first is the
1932
/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1933
/// supports highly complicated in-order reservation tables
1934
/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1935
///
1936
/// The second is a streamlined mechanism that checks for hazards based on
1937
/// simple counters that the scheduler itself maintains. It explicitly checks
1938
/// for instruction dispatch limitations, including the number of micro-ops that
1939
/// can dispatch per cycle.
1940
///
1941
/// TODO: Also check whether the SU must start a new group.
1942
79.2M
bool SchedBoundary::checkHazard(SUnit *SU) {
1943
79.2M
  if (HazardRec->isEnabled()
1944
79.2M
      && 
HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard63.3k
) {
1945
4.15k
    return true;
1946
4.15k
  }
1947
79.2M
1948
79.2M
  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1949
79.2M
  if (
(CurrMOps > 0) && 79.2M
(CurrMOps + uops > SchedModel->getIssueWidth())57.6M
) {
1950
824k
    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1951
824k
          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1952
824k
    return true;
1953
824k
  }
1954
78.4M
1955
78.4M
  
if (78.4M
CurrMOps > 0 &&
1956
56.8M
      
((isTop() && 56.8M
SchedModel->mustBeginGroup(SU->getInstr())11.5M
) ||
1957
78.4M
       
(!isTop() && 56.8M
SchedModel->mustEndGroup(SU->getInstr())45.2M
))) {
1958
0
    DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
1959
0
                 << (isTop()? "begin" : "end") << " group\n");
1960
0
    return true;
1961
0
  }
1962
78.4M
1963
78.4M
  
if (78.4M
SchedModel->hasInstrSchedModel() && 78.4M
SU->hasReservedResource76.6M
) {
1964
1.79k
    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1965
1.79k
    for (TargetSchedModel::ProcResIter
1966
1.79k
           PI = SchedModel->getWriteProcResBegin(SC),
1967
3.62k
           PE = SchedModel->getWriteProcResEnd(SC); 
PI != PE3.62k
;
++PI1.82k
) {
1968
2.42k
      unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
1969
2.42k
      if (
NRCycle > CurrCycle2.42k
) {
1970
#ifndef NDEBUG
1971
        MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
1972
#endif
1973
597
        DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
1974
597
              << SchedModel->getResourceName(PI->ProcResourceIdx)
1975
597
              << "=" << NRCycle << "c\n");
1976
597
        return true;
1977
597
      }
1978
2.42k
    }
1979
1.79k
  }
1980
78.4M
  return false;
1981
79.2M
}
1982
1983
// Find the unscheduled node in ReadySUs with the highest latency.
1984
unsigned SchedBoundary::
1985
35.2M
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1986
35.2M
  SUnit *LateSU = nullptr;
1987
35.2M
  unsigned RemLatency = 0;
1988
111M
  for (SUnit *SU : ReadySUs) {
1989
111M
    unsigned L = getUnscheduledLatency(SU);
1990
111M
    if (
L > RemLatency111M
) {
1991
13.8M
      RemLatency = L;
1992
13.8M
      LateSU = SU;
1993
13.8M
    }
1994
111M
  }
1995
35.2M
  if (
LateSU35.2M
) {
1996
11.0M
    DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1997
11.0M
          << LateSU->NodeNum << ") " << RemLatency << "c\n");
1998
11.0M
  }
1999
35.2M
  return RemLatency;
2000
35.2M
}
2001
2002
// Count resources in this zone and the remaining unscheduled
2003
// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2004
// resource index, or zero if the zone is issue limited.
2005
unsigned SchedBoundary::
2006
17.5M
getOtherResourceCount(unsigned &OtherCritIdx) {
2007
17.5M
  OtherCritIdx = 0;
2008
17.5M
  if (!SchedModel->hasInstrSchedModel())
2009
167k
    return 0;
2010
17.4M
2011
17.4M
  unsigned OtherCritCount = Rem->RemIssueCount
2012
17.4M
    + (RetiredMOps * SchedModel->getMicroOpFactor());
2013
17.4M
  DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2014
17.4M
        << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2015
17.4M
  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2016
241M
       
PIdx != PEnd241M
;
++PIdx224M
) {
2017
224M
    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2018
224M
    if (
OtherCount > OtherCritCount224M
) {
2019
9.92M
      OtherCritCount = OtherCount;
2020
9.92M
      OtherCritIdx = PIdx;
2021
9.92M
    }
2022
224M
  }
2023
17.4M
  if (
OtherCritIdx17.4M
) {
2024
9.43M
    DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2025
9.43M
          << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2026
9.43M
          << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2027
9.43M
  }
2028
17.5M
  return OtherCritCount;
2029
17.5M
}
2030
2031
27.0M
void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2032
27.0M
  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2033
27.0M
2034
#ifndef NDEBUG
2035
  // ReadyCycle was been bumped up to the CurrCycle when this node was
2036
  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2037
  // scheduling, so may now be greater than ReadyCycle.
2038
  if (ReadyCycle > CurrCycle)
2039
    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2040
#endif
2041
2042
27.0M
  if (ReadyCycle < MinReadyCycle)
2043
8.53M
    MinReadyCycle = ReadyCycle;
2044
27.0M
2045
27.0M
  // Check for interlocks first. For the purpose of other heuristics, an
2046
27.0M
  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2047
27.0M
  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2048
27.0M
  if (
(!IsBuffered && 27.0M
ReadyCycle > CurrCycle257k
) ||
checkHazard(SU)27.0M
||
2049
26.9M
      Available.size() >= ReadyListLimit)
2050
173k
    Pending.push(SU);
2051
27.0M
  else
2052
26.9M
    Available.push(SU);
2053
27.0M
}
2054
2055
/// Move the boundary of scheduled code by one cycle.
2056
1.34M
void SchedBoundary::bumpCycle(unsigned NextCycle) {
2057
1.34M
  if (
SchedModel->getMicroOpBufferSize() == 01.34M
) {
2058
165k
    assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2059
165k
           "MinReadyCycle uninitialized");
2060
165k
    if (MinReadyCycle > NextCycle)
2061
16.8k
      NextCycle = MinReadyCycle;
2062
165k
  }
2063
1.34M
  // Update the current micro-ops, which will issue in the next cycle.
2064
1.34M
  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2065
1.34M
  CurrMOps = (CurrMOps <= DecMOps) ? 
01.32M
:
CurrMOps - DecMOps14.6k
;
2066
1.34M
2067
1.34M
  // Decrement DependentLatency based on the next cycle.
2068
1.34M
  if ((NextCycle - CurrCycle) > DependentLatency)
2069
169k
    DependentLatency = 0;
2070
1.34M
  else
2071
1.17M
    DependentLatency -= (NextCycle - CurrCycle);
2072
1.34M
2073
1.34M
  if (
!HazardRec->isEnabled()1.34M
) {
2074
1.31M
    // Bypass HazardRec virtual calls.
2075
1.31M
    CurrCycle = NextCycle;
2076
1.34M
  } else {
2077
24.4k
    // Bypass getHazardType calls in case of long latency.
2078
63.5k
    for (; 
CurrCycle != NextCycle63.5k
;
++CurrCycle39.1k
) {
2079
39.1k
      if (isTop())
2080
985
        HazardRec->AdvanceCycle();
2081
39.1k
      else
2082
38.1k
        HazardRec->RecedeCycle();
2083
39.1k
    }
2084
24.4k
  }
2085
1.34M
  CheckPending = true;
2086
1.34M
  unsigned LFactor = SchedModel->getLatencyFactor();
2087
1.34M
  IsResourceLimited =
2088
1.34M
    (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2089
1.34M
    > (int)LFactor;
2090
1.34M
2091
1.34M
  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2092
1.34M
}
2093
2094
14.0M
void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2095
14.0M
  ExecutedResCounts[PIdx] += Count;
2096
14.0M
  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2097
9.95M
    MaxExecutedResCount = ExecutedResCounts[PIdx];
2098
14.0M
}
2099
2100
/// Add the given processor resource to this scheduled zone.
2101
///
2102
/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2103
/// during which this resource is consumed.
2104
///
2105
/// \return the next cycle at which the instruction may execute without
2106
/// oversubscribing resources.
2107
unsigned SchedBoundary::
2108
14.0M
countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2109
14.0M
  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2110
14.0M
  unsigned Count = Factor * Cycles;
2111
14.0M
  DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
2112
14.0M
        << " +" << Cycles << "x" << Factor << "u\n");
2113
14.0M
2114
14.0M
  // Update Executed resources counts.
2115
14.0M
  incExecutedResources(PIdx, Count);
2116
14.0M
  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2117
14.0M
  Rem->RemainingCounts[PIdx] -= Count;
2118
14.0M
2119
14.0M
  // Check if this resource exceeds the current critical resource. If so, it
2120
14.0M
  // becomes the critical resource.
2121
14.0M
  if (
ZoneCritResIdx != PIdx && 14.0M
(getResourceCount(PIdx) > getCriticalCount())9.66M
) {
2122
3.96M
    ZoneCritResIdx = PIdx;
2123
3.96M
    DEBUG(dbgs() << "  *** Critical resource "
2124
3.96M
          << SchedModel->getResourceName(PIdx) << ": "
2125
3.96M
          << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2126
3.96M
  }
2127
14.0M
  // For reserved resources, record the highest cycle using the resource.
2128
14.0M
  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2129
14.0M
  if (
NextAvailable > CurrCycle14.0M
) {
2130
4
    DEBUG(dbgs() << "  Resource conflict: "
2131
4
          << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
2132
4
          << NextAvailable << "\n");
2133
4
  }
2134
14.0M
  return NextAvailable;
2135
14.0M
}
2136
2137
/// Move the boundary of scheduled code by one SUnit.
2138
16.5M
void SchedBoundary::bumpNode(SUnit *SU) {
2139
16.5M
  // Update the reservation table.
2140
16.5M
  if (
HazardRec->isEnabled()16.5M
) {
2141
33.2k
    if (
!isTop() && 33.2k
SU->isCall27.3k
) {
2142
0
      // Calls are scheduled with their preceding instructions. For bottom-up
2143
0
      // scheduling, clear the pipeline state before emitting.
2144
0
      HazardRec->Reset();
2145
0
    }
2146
33.2k
    HazardRec->EmitInstruction(SU);
2147
33.2k
  }
2148
16.5M
  // checkHazard should prevent scheduling multiple instructions per cycle that
2149
16.5M
  // exceed the issue width.
2150
16.5M
  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2151
16.5M
  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2152
16.5M
  assert(
2153
16.5M
      (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2154
16.5M
      "Cannot schedule this instruction's MicroOps in the current cycle.");
2155
16.5M
2156
16.5M
  unsigned ReadyCycle = (isTop() ? 
SU->TopReadyCycle1.37M
:
SU->BotReadyCycle15.1M
);
2157
16.5M
  DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2158
16.5M
2159
16.5M
  unsigned NextCycle = CurrCycle;
2160
16.5M
  switch (SchedModel->getMicroOpBufferSize()) {
2161
197k
  case 0:
2162
197k
    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2163
197k
    break;
2164
202k
  case 1:
2165
202k
    if (
ReadyCycle > NextCycle202k
) {
2166
17.8k
      NextCycle = ReadyCycle;
2167
17.8k
      DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2168
17.8k
    }
2169
202k
    break;
2170
16.1M
  default:
2171
16.1M
    // We don't currently model the OOO reorder buffer, so consider all
2172
16.1M
    // scheduled MOps to be "retired". We do loosely model in-order resource
2173
16.1M
    // latency. If this instruction uses an in-order resource, account for any
2174
16.1M
    // likely stall cycles.
2175
16.1M
    if (
SU->isUnbuffered && 16.1M
ReadyCycle > NextCycle485
)
2176
314
      NextCycle = ReadyCycle;
2177
16.1M
    break;
2178
16.5M
  }
2179
16.5M
  RetiredMOps += IncMOps;
2180
16.5M
2181
16.5M
  // Update resource counts and critical resource.
2182
16.5M
  if (
SchedModel->hasInstrSchedModel()16.5M
) {
2183
15.9M
    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2184
15.9M
    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2185
15.9M
    Rem->RemIssueCount -= DecRemIssue;
2186
15.9M
    if (
ZoneCritResIdx15.9M
) {
2187
7.02M
      // Scale scheduled micro-ops for comparing with the critical resource.
2188
7.02M
      unsigned ScaledMOps =
2189
7.02M
        RetiredMOps * SchedModel->getMicroOpFactor();
2190
7.02M
2191
7.02M
      // If scaled micro-ops are now more than the previous critical resource by
2192
7.02M
      // a full cycle, then micro-ops issue becomes critical.
2193
7.02M
      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2194
7.02M
          >= (int)SchedModel->getLatencyFactor()) {
2195
3.67k
        ZoneCritResIdx = 0;
2196
3.67k
        DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2197
3.67k
              << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2198
3.67k
      }
2199
7.02M
    }
2200
15.9M
    for (TargetSchedModel::ProcResIter
2201
15.9M
           PI = SchedModel->getWriteProcResBegin(SC),
2202
30.0M
           PE = SchedModel->getWriteProcResEnd(SC); 
PI != PE30.0M
;
++PI14.0M
) {
2203
14.0M
      unsigned RCycle =
2204
14.0M
        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2205
14.0M
      if (RCycle > NextCycle)
2206
4
        NextCycle = RCycle;
2207
14.0M
    }
2208
15.9M
    if (
SU->hasReservedResource15.9M
) {
2209
589
      // For reserved resources, record the highest cycle using the resource.
2210
589
      // For top-down scheduling, this is the cycle in which we schedule this
2211
589
      // instruction plus the number of cycles the operations reserves the
2212
589
      // resource. For bottom-up is it simply the instruction's cycle.
2213
589
      for (TargetSchedModel::ProcResIter
2214
589
             PI = SchedModel->getWriteProcResBegin(SC),
2215
1.32k
             PE = SchedModel->getWriteProcResEnd(SC); 
PI != PE1.32k
;
++PI737
) {
2216
737
        unsigned PIdx = PI->ProcResourceIdx;
2217
737
        if (
SchedModel->getProcResource(PIdx)->BufferSize == 0737
) {
2218
589
          if (
isTop()589
) {
2219
303
            ReservedCycles[PIdx] =
2220
303
              std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2221
303
          }
2222
589
          else
2223
286
            ReservedCycles[PIdx] = NextCycle;
2224
589
        }
2225
737
      }
2226
589
    }
2227
15.9M
  }
2228
16.5M
  // Update ExpectedLatency and DependentLatency.
2229
16.5M
  unsigned &TopLatency = isTop() ? 
ExpectedLatency1.37M
:
DependentLatency15.1M
;
2230
16.5M
  unsigned &BotLatency = isTop() ? 
DependentLatency1.37M
:
ExpectedLatency15.1M
;
2231
16.5M
  if (
SU->getDepth() > TopLatency16.5M
) {
2232
2.77M
    TopLatency = SU->getDepth();
2233
2.77M
    DEBUG(dbgs() << "  " << Available.getName()
2234
2.77M
          << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2235
2.77M
  }
2236
16.5M
  if (
SU->getHeight() > BotLatency16.5M
) {
2237
5.57M
    BotLatency = SU->getHeight();
2238
5.57M
    DEBUG(dbgs() << "  " << Available.getName()
2239
5.57M
          << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2240
5.57M
  }
2241
16.5M
  // If we stall for any reason, bump the cycle.
2242
16.5M
  if (
NextCycle > CurrCycle16.5M
) {
2243
18.1k
    bumpCycle(NextCycle);
2244
16.5M
  } else {
2245
16.5M
    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2246
16.5M
    // resource limited. If a stall occurred, bumpCycle does this.
2247
16.5M
    unsigned LFactor = SchedModel->getLatencyFactor();
2248
16.5M
    IsResourceLimited =
2249
16.5M
      (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2250
16.5M
      > (int)LFactor;
2251
16.5M
  }
2252
16.5M
  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2253
16.5M
  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2254
16.5M
  // one cycle.  Since we commonly reach the max MOps here, opportunistically
2255
16.5M
  // bump the cycle to avoid uselessly checking everything in the readyQ.
2256
16.5M
  CurrMOps += IncMOps;
2257
16.5M
2258
16.5M
  // Bump the cycle count for issue group constraints.
2259
16.5M
  // This must be done after NextCycle has been adjust for all other stalls.
2260
16.5M
  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2261
16.5M
  // currCycle to X.
2262
16.5M
  if (
(isTop() && 16.5M
SchedModel->mustEndGroup(SU->getInstr())1.37M
) ||
2263
16.5M
      
(!isTop() && 16.5M
SchedModel->mustBeginGroup(SU->getInstr())15.1M
)) {
2264
0
    DEBUG(dbgs() << "  Bump cycle to "
2265
0
                 << (isTop() ? "end" : "begin") << " group\n");
2266
0
    bumpCycle(++NextCycle);
2267
0
  }
2268
16.5M
2269
17.7M
  while (
CurrMOps >= SchedModel->getIssueWidth()17.7M
) {
2270
1.16M
    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
2271
1.16M
          << " at cycle " << CurrCycle << '\n');
2272
1.16M
    bumpCycle(++NextCycle);
2273
1.16M
  }
2274
16.5M
  DEBUG(dumpScheduledState());
2275
16.5M
}
2276
2277
/// Release pending ready nodes in to the available queue. This makes them
2278
/// visible to heuristics.
2279
1.13M
void SchedBoundary::releasePending() {
2280
1.13M
  // If the available queue is empty, it is safe to reset MinReadyCycle.
2281
1.13M
  if (Available.empty())
2282
192k
    MinReadyCycle = std::numeric_limits<unsigned>::max();
2283
1.13M
2284
1.13M
  // Check to see if any of the pending instructions are ready to issue.  If
2285
1.13M
  // so, add them to the available queue.
2286
1.13M
  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2287
2.07M
  for (unsigned i = 0, e = Pending.size(); 
i != e2.07M
;
++i936k
) {
2288
936k
    SUnit *SU = *(Pending.begin()+i);
2289
936k
    unsigned ReadyCycle = isTop() ? 
SU->TopReadyCycle35.9k
:
SU->BotReadyCycle900k
;
2290
936k
2291
936k
    if (ReadyCycle < MinReadyCycle)
2292
203k
      MinReadyCycle = ReadyCycle;
2293
936k
2294
936k
    if (
!IsBuffered && 936k
ReadyCycle > CurrCycle124k
)
2295
54.2k
      continue;
2296
882k
2297
882k
    
if (882k
checkHazard(SU)882k
)
2298
244
      continue;
2299
881k
2300
881k
    
if (881k
Available.size() >= ReadyListLimit881k
)
2301
284
      break;
2302
881k
2303
881k
    Available.push(SU);
2304
881k
    Pending.remove(Pending.begin()+i);
2305
881k
    --i; --e;
2306
881k
  }
2307
1.13M
  CheckPending = false;
2308
1.13M
}
2309
2310
/// Remove SU from the ready set for this boundary.
2311
27.0M
void SchedBoundary::removeReady(SUnit *SU) {
2312
27.0M
  if (Available.isInQueue(SU))
2313
27.0M
    Available.remove(Available.find(SU));
2314
20.4k
  else {
2315
20.4k
    assert(Pending.isInQueue(SU) && "bad ready count");
2316
20.4k
    Pending.remove(Pending.find(SU));
2317
20.4k
  }
2318
27.0M
}
2319
2320
/// If this queue only has one ready candidate, return it. As a side effect,
2321
/// defer any nodes that now hit a hazard, and advance the cycle until at least
2322
/// one node is ready. If multiple instructions are ready, return NULL.
2323
25.5M
SUnit *SchedBoundary::pickOnlyChoice() {
2324
25.5M
  if (CheckPending)
2325
981k
    releasePending();
2326
25.5M
2327
25.5M
  if (
CurrMOps > 025.5M
) {
2328
11.9M
    // Defer any ready instrs that now have a hazard.
2329
63.3M
    for (ReadyQueue::iterator I = Available.begin(); 
I != Available.end()63.3M
;) {
2330
51.3M
      if (
checkHazard(*I)51.3M
) {
2331
728k
        Pending.push(*I);
2332
728k
        I = Available.remove(I);
2333
728k
        continue;
2334
728k
      }
2335
50.6M
      ++I;
2336
50.6M
    }
2337
11.9M
  }
2338
25.7M
  for (unsigned i = 0; 
Available.empty()25.7M
;
++i157k
) {
2339
157k
//  FIXME: Re-enable assert once PR20057 is resolved.
2340
157k
//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2341
157k
//           "permanent hazard");
2342
157k
    (void)i;
2343
157k
    bumpCycle(CurrCycle + 1);
2344
157k
    releasePending();
2345
157k
  }
2346
25.5M
2347
25.5M
  DEBUG(Pending.dump());
2348
25.5M
  DEBUG(Available.dump());
2349
25.5M
2350
25.5M
  if (Available.size() == 1)
2351
7.37M
    return *Available.begin();
2352
18.1M
  return nullptr;
2353
18.1M
}
2354
2355
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2356
// This is useful information to dump after bumpNode.
2357
// Note that the Queue contents are more useful before pickNodeFromQueue.
2358
LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2359
  unsigned ResFactor;
2360
  unsigned ResCount;
2361
  if (ZoneCritResIdx) {
2362
    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2363
    ResCount = getResourceCount(ZoneCritResIdx);
2364
  } else {
2365
    ResFactor = SchedModel->getMicroOpFactor();
2366
    ResCount = RetiredMOps * ResFactor;
2367
  }
2368
  unsigned LFactor = SchedModel->getLatencyFactor();
2369
  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2370
         << "  Retired: " << RetiredMOps;
2371
  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2372
  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2373
         << ResCount / ResFactor << " "
2374
         << SchedModel->getResourceName(ZoneCritResIdx)
2375
         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2376
         << (IsResourceLimited ? "  - Resource" : "  - Latency")
2377
         << " limited.\n";
2378
}
2379
#endif
2380
2381
//===----------------------------------------------------------------------===//
2382
// GenericScheduler - Generic implementation of MachineSchedStrategy.
2383
//===----------------------------------------------------------------------===//
2384
2385
void GenericSchedulerBase::SchedCandidate::
2386
initResourceDelta(const ScheduleDAGMI *DAG,
2387
82.0M
                  const TargetSchedModel *SchedModel) {
2388
82.0M
  if (
!Policy.ReduceResIdx && 82.0M
!Policy.DemandResIdx79.2M
)
2389
58.6M
    return;
2390
23.3M
2391
23.3M
  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2392
23.3M
  for (TargetSchedModel::ProcResIter
2393
23.3M
         PI = SchedModel->getWriteProcResBegin(SC),
2394
48.3M
         PE = SchedModel->getWriteProcResEnd(SC); 
PI != PE48.3M
;
++PI25.0M
) {
2395
25.0M
    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2396
2.05M
      ResDelta.CritResources += PI->Cycles;
2397
25.0M
    if (PI->ProcResourceIdx == Policy.DemandResIdx)
2398
16.1M
      ResDelta.DemandedResources += PI->Cycles;
2399
25.0M
  }
2400
82.0M
}
2401
2402
/// Set the CandPolicy given a scheduling zone given the current resources and
2403
/// latencies inside and outside the zone.
2404
void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2405
                                     SchedBoundary &CurrZone,
2406
17.6M
                                     SchedBoundary *OtherZone) {
2407
17.6M
  // Apply preemptive heuristics based on the total latency and resources
2408
17.6M
  // inside and outside this zone. Potential stalls should be considered before
2409
17.6M
  // following this policy.
2410
17.6M
2411
17.6M
  // Compute remaining latency. We need this both to determine whether the
2412
17.6M
  // overall schedule has become latency-limited and whether the instructions
2413
17.6M
  // outside this zone are resource or latency limited.
2414
17.6M
  //
2415
17.6M
  // The "dependent" latency is updated incrementally during scheduling as the
2416
17.6M
  // max height/depth of scheduled nodes minus the cycles since it was
2417
17.6M
  // scheduled:
2418
17.6M
  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2419
17.6M
  //
2420
17.6M
  // The "independent" latency is the max ready queue depth:
2421
17.6M
  //   ILat = max N.depth for N in Available|Pending
2422
17.6M
  //
2423
17.6M
  // RemainingLatency is the greater of independent and dependent latency.
2424
17.6M
  unsigned RemLatency = CurrZone.getDependentLatency();
2425
17.6M
  RemLatency = std::max(RemLatency,
2426
17.6M
                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
2427
17.6M
  RemLatency = std::max(RemLatency,
2428
17.6M
                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2429
17.6M
2430
17.6M
  // Compute the critical resource outside the zone.
2431
17.6M
  unsigned OtherCritIdx = 0;
2432
17.6M
  unsigned OtherCount =
2433
17.6M
    OtherZone ? 
OtherZone->getOtherResourceCount(OtherCritIdx)17.5M
:
08.89k
;
2434
17.6M
2435
17.6M
  bool OtherResLimited = false;
2436
17.6M
  if (
SchedModel->hasInstrSchedModel()17.6M
) {
2437
17.4M
    unsigned LFactor = SchedModel->getLatencyFactor();
2438
17.4M
    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2439
17.4M
  }
2440
17.6M
  // Schedule aggressively for latency in PostRA mode. We don't check for
2441
17.6M
  // acyclic latency during PostRA, and highly out-of-order processors will
2442
17.6M
  // skip PostRA scheduling.
2443
17.6M
  if (
!OtherResLimited17.6M
) {
2444
15.2M
    if (
IsPostRA || 15.2M
(RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)15.2M
) {
2445
790k
      Policy.ReduceLatency |= true;
2446
790k
      DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2447
790k
            << " RemainingLatency " << RemLatency << " + "
2448
790k
            << CurrZone.getCurrCycle() << "c > CritPath "
2449
790k
            << Rem.CriticalPath << "\n");
2450
790k
    }
2451
15.2M
  }
2452
17.6M
  // If the same resource is limiting inside and outside the zone, do nothing.
2453
17.6M
  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2454
10.5M
    return;
2455
7.10M
2456
7.10M
  
DEBUG7.10M
(
2457
7.10M
    if (CurrZone.isResourceLimited()) {
2458
7.10M
      dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2459
7.10M
             << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2460
7.10M
             << "\n";
2461
7.10M
    }
2462
7.10M
    if (OtherResLimited)
2463
7.10M
      dbgs() << "  RemainingLimit: "
2464
7.10M
             << SchedModel->getResourceName(OtherCritIdx) << "\n";
2465
7.10M
    if (!CurrZone.isResourceLimited() && !OtherResLimited)
2466
7.10M
      dbgs() << "  Latency limited both directions.\n");
2467
7.10M
2468
7.10M
  if (
CurrZone.isResourceLimited() && 7.10M
!Policy.ReduceResIdx158k
)
2469
158k
    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2470
7.10M
2471
7.10M
  if (OtherResLimited)
2472
1.30M
    Policy.DemandResIdx = OtherCritIdx;
2473
17.6M
}
2474
2475
#ifndef NDEBUG
2476
const char *GenericSchedulerBase::getReasonStr(
2477
  GenericSchedulerBase::CandReason Reason) {
2478
  switch (Reason) {
2479
  case NoCand:         return "NOCAND    ";
2480
  case Only1:          return "ONLY1     ";
2481
  case PhysRegCopy:    return "PREG-COPY ";
2482
  case RegExcess:      return "REG-EXCESS";
2483
  case RegCritical:    return "REG-CRIT  ";
2484
  case Stall:          return "STALL     ";
2485
  case Cluster:        return "CLUSTER   ";
2486
  case Weak:           return "WEAK      ";
2487
  case RegMax:         return "REG-MAX   ";
2488
  case ResourceReduce: return "RES-REDUCE";
2489
  case ResourceDemand: return "RES-DEMAND";
2490
  case TopDepthReduce: return "TOP-DEPTH ";
2491
  case TopPathReduce:  return "TOP-PATH  ";
2492
  case BotHeightReduce:return "BOT-HEIGHT";
2493
  case BotPathReduce:  return "BOT-PATH  ";
2494
  case NextDefUse:     return "DEF-USE   ";
2495
  case NodeOrder:      return "ORDER     ";
2496
  };
2497
  llvm_unreachable("Unknown reason!");
2498
}
2499
2500
void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2501
  PressureChange P;
2502
  unsigned ResIdx = 0;
2503
  unsigned Latency = 0;
2504
  switch (Cand.Reason) {
2505
  default:
2506
    break;
2507
  case RegExcess:
2508
    P = Cand.RPDelta.Excess;
2509
    break;
2510
  case RegCritical:
2511
    P = Cand.RPDelta.CriticalMax;
2512
    break;
2513
  case RegMax:
2514
    P = Cand.RPDelta.CurrentMax;
2515
    break;
2516
  case ResourceReduce:
2517
    ResIdx = Cand.Policy.ReduceResIdx;
2518
    break;
2519
  case ResourceDemand:
2520
    ResIdx = Cand.Policy.DemandResIdx;
2521
    break;
2522
  case TopDepthReduce:
2523
    Latency = Cand.SU->getDepth();
2524
    break;
2525
  case TopPathReduce:
2526
    Latency = Cand.SU->getHeight();
2527
    break;
2528
  case BotHeightReduce:
2529
    Latency = Cand.SU->getHeight();
2530
    break;
2531
  case BotPathReduce:
2532
    Latency = Cand.SU->getDepth();
2533
    break;
2534
  }
2535
  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2536
  if (P.isValid())
2537
    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2538
           << ":" << P.getUnitInc() << " ";
2539
  else
2540
    dbgs() << "      ";
2541
  if (ResIdx)
2542
    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2543
  else
2544
    dbgs() << "         ";
2545
  if (Latency)
2546
    dbgs() << " " << Latency << " cycles ";
2547
  else
2548
    dbgs() << "          ";
2549
  dbgs() << '\n';
2550
}
2551
#endif
2552
2553
/// Return true if this heuristic determines order.
2554
static bool tryLess(int TryVal, int CandVal,
2555
                    GenericSchedulerBase::SchedCandidate &TryCand,
2556
                    GenericSchedulerBase::SchedCandidate &Cand,
2557
323M
                    GenericSchedulerBase::CandReason Reason) {
2558
323M
  if (
TryVal < CandVal323M
) {
2559
506k
    TryCand.Reason = Reason;
2560
506k
    return true;
2561
506k
  }
2562
322M
  
if (322M
TryVal > CandVal322M
) {
2563
6.56M
    if (Cand.Reason > Reason)
2564
2.57M
      Cand.Reason = Reason;
2565
6.56M
    return true;
2566
6.56M
  }
2567
315M
  return false;
2568
315M
}
2569
2570
static bool tryGreater(int TryVal, int CandVal,
2571
                       GenericSchedulerBase::SchedCandidate &TryCand,
2572
                       GenericSchedulerBase::SchedCandidate &Cand,
2573
363M
                       GenericSchedulerBase::CandReason Reason) {
2574
363M
  if (
TryVal > CandVal363M
) {
2575
4.28M
    TryCand.Reason = Reason;
2576
4.28M
    return true;
2577
4.28M
  }
2578
359M
  
if (359M
TryVal < CandVal359M
) {
2579
12.9M
    if (Cand.Reason > Reason)
2580
4.11M
      Cand.Reason = Reason;
2581
12.9M
    return true;
2582
12.9M
  }
2583
346M
  return false;
2584
346M
}
2585
2586
static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2587
                       GenericSchedulerBase::SchedCandidate &Cand,
2588
9.75M
                       SchedBoundary &Zone) {
2589
9.75M
  if (
Zone.isTop()9.75M
) {
2590
7.79M
    if (
Cand.SU->getDepth() > Zone.getScheduledLatency()7.79M
) {
2591
7.55M
      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2592
7.55M
                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2593
127
        return true;
2594
7.79M
    }
2595
7.79M
    
if (7.79M
tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2596
7.79M
                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2597
112k
      return true;
2598
1.96M
  } else {
2599
1.96M
    if (
Cand.SU->getHeight() > Zone.getScheduledLatency()1.96M
) {
2600
11.7k
      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2601
11.7k
                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2602
5.54k
        return true;
2603
1.95M
    }
2604
1.95M
    
if (1.95M
tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2605
1.95M
                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2606
318k
      return true;
2607
9.32M
  }
2608
9.32M
  return false;
2609
9.32M
}
2610
2611
15.9M
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2612
15.9M
  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2613
15.9M
        << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2614
15.9M
}
2615
2616
8.97M
static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2617
8.97M
  tracePick(Cand.Reason, Cand.AtTop);
2618
8.97M
}
2619
2620
4.07M
void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2621
4.07M
  assert(dag->hasVRegLiveness() &&
2622
4.07M
         "(PreRA)GenericScheduler needs vreg liveness");
2623
4.07M
  DAG = static_cast<ScheduleDAGMILive*>(dag);
2624
4.07M
  SchedModel = DAG->getSchedModel();
2625
4.07M
  TRI = DAG->TRI;
2626
4.07M
2627
4.07M
  Rem.init(DAG, SchedModel);
2628
4.07M
  Top.init(DAG, SchedModel, &Rem);
2629
4.07M
  Bot.init(DAG, SchedModel, &Rem);
2630
4.07M
2631
4.07M
  // Initialize resource counts.
2632
4.07M
2633
4.07M
  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2634
4.07M
  // are disabled, then these HazardRecs will be disabled.
2635
4.07M
  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2636
4.07M
  if (
!Top.HazardRec4.07M
) {
2637
519k
    Top.HazardRec =
2638
519k
        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2639
519k
            Itin, DAG);
2640
519k
  }
2641
4.07M
  if (
!Bot.HazardRec4.07M
) {
2642
519k
    Bot.HazardRec =
2643
519k
        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2644
519k
            Itin, DAG);
2645
519k
  }
2646
4.07M
  TopCand.SU = nullptr;
2647
4.07M
  BotCand.SU = nullptr;
2648
4.07M
}
2649
2650
/// Initialize the per-region scheduling policy.
2651
void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2652
                                  MachineBasicBlock::iterator End,
2653
11.7M
                                  unsigned NumRegionInstrs) {
2654
11.7M
  const MachineFunction &MF = *Begin->getParent()->getParent();
2655
11.7M
  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2656
11.7M
2657
11.7M
  // Avoid setting up the register pressure tracker for small regions to save
2658
11.7M
  // compile time. As a rough heuristic, only track pressure when the number of
2659
11.7M
  // schedulable instructions exceeds half the integer register file.
2660
11.7M
  RegionPolicy.ShouldTrackPressure = true;
2661
47.1M
  for (unsigned VT = MVT::i32; 
VT > (unsigned)MVT::i147.1M
;
--VT35.3M
) {
2662
35.3M
    MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2663
35.3M
    if (
TLI->isTypeLegal(LegalIntVT)35.3M
) {
2664
12.3M
      unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2665
12.3M
        TLI->getRegClassFor(LegalIntVT));
2666
12.3M
      RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2667
12.3M
    }
2668
35.3M
  }
2669
11.7M
2670
11.7M
  // For generic targets, we default to bottom-up, because it's simpler and more
2671
11.7M
  // compile-time optimizations have been implemented in that direction.
2672
11.7M
  RegionPolicy.OnlyBottomUp = true;
2673
11.7M
2674
11.7M
  // Allow the subtarget to override default policy.
2675
11.7M
  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2676
11.7M
2677
11.7M
  // After subtarget overrides, apply command line options.
2678
11.7M
  if (!EnableRegPressure)
2679
0
    RegionPolicy.ShouldTrackPressure = false;
2680
11.7M
2681
11.7M
  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2682
11.7M
  // e.g. -misched-bottomup=false allows scheduling in both directions.
2683
11.7M
  assert((!ForceTopDown || !ForceBottomUp) &&
2684
11.7M
         "-misched-topdown incompatible with -misched-bottomup");
2685
11.7M
  if (
ForceBottomUp.getNumOccurrences() > 011.7M
) {
2686
0
    RegionPolicy.OnlyBottomUp = ForceBottomUp;
2687
0
    if (RegionPolicy.OnlyBottomUp)
2688
0
      RegionPolicy.OnlyTopDown = false;
2689
0
  }
2690
11.7M
  if (
ForceTopDown.getNumOccurrences() > 011.7M
) {
2691
4
    RegionPolicy.OnlyTopDown = ForceTopDown;
2692
4
    if (RegionPolicy.OnlyTopDown)
2693
4
      RegionPolicy.OnlyBottomUp = false;
2694
4
  }
2695
11.7M
}
2696
2697
0
void GenericScheduler::dumpPolicy() const {
2698
0
  // Cannot completely remove virtual function even in release mode.
2699
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2700
  dbgs() << "GenericScheduler RegionPolicy: "
2701
         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2702
         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2703
         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2704
         << "\n";
2705
#endif
2706
}
2707
2708
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2709
/// critical path by more cycles than it takes to drain the instruction buffer.
2710
/// We estimate an upper bounds on in-flight instructions as:
2711
///
2712
/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2713
/// InFlightIterations = AcyclicPath / CyclesPerIteration
2714
/// InFlightResources = InFlightIterations * LoopResources
2715
///
2716
/// TODO: Check execution resources in addition to IssueCount.
2717
4.05M
void GenericScheduler::checkAcyclicLatency() {
2718
4.05M
  if (
Rem.CyclicCritPath == 0 || 4.05M
Rem.CyclicCritPath >= Rem.CriticalPath10.9k
)
2719
4.04M
    return;
2720
9.35k
2721
9.35k
  // Scaled number of cycles per loop iteration.
2722
9.35k
  unsigned IterCount =
2723
9.35k
    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2724
9.35k
             Rem.RemIssueCount);
2725
9.35k
  // Scaled acyclic critical path.
2726
9.35k
  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2727
9.35k
  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2728
9.35k
  unsigned InFlightCount =
2729
9.35k
    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2730
9.35k
  unsigned BufferLimit =
2731
9.35k
    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2732
9.35k
2733
9.35k
  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2734
9.35k
2735
9.35k
  DEBUG(dbgs() << "IssueCycles="
2736
4.05M
        << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2737
4.05M
        << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2738
4.05M
        << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2739
4.05M
        << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2740
4.05M
        << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2741
4.05M
        if (Rem.IsAcyclicLatencyLimited)
2742
4.05M
          dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2743
4.05M
}
2744
2745
4.07M
void GenericScheduler::registerRoots() {
2746
4.07M
  Rem.CriticalPath = DAG->ExitSU.getDepth();
2747
4.07M
2748
4.07M
  // Some roots may not feed into ExitSU. Check all of them in case.
2749
9.65M
  for (const SUnit *SU : Bot.Available) {
2750
9.65M
    if (SU->getDepth() > Rem.CriticalPath)
2751
582k
      Rem.CriticalPath = SU->getDepth();
2752
9.65M
  }
2753
4.07M
  DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2754
4.07M
  if (
DumpCriticalPathLength4.07M
) {
2755
0
    errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2756
0
  }
2757
4.07M
2758
4.07M
  if (
EnableCyclicPath && 4.07M
SchedModel->getMicroOpBufferSize() > 04.07M
) {
2759
4.05M
    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2760
4.05M
    checkAcyclicLatency();
2761
4.05M
  }
2762
4.07M
}
2763
2764
static bool tryPressure(const PressureChange &TryP,
2765
                        const PressureChange &CandP,
2766
                        GenericSchedulerBase::SchedCandidate &TryCand,
2767
                        GenericSchedulerBase::SchedCandidate &Cand,
2768
                        GenericSchedulerBase::CandReason Reason,
2769
                        const TargetRegisterInfo *TRI,
2770
146M
                        const MachineFunction &MF) {
2771
146M
  // If one candidate decreases and the other increases, go with it.
2772
146M
  // Invalid candidates have UnitInc==0.
2773
146M
  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2774
146M
                 Reason)) {
2775
499k
    return true;
2776
499k
  }
2777
146M
  // Do not compare the magnitude of pressure changes between top and bottom
2778
146M
  // boundary.
2779
146M
  
if (146M
Cand.AtTop != TryCand.AtTop146M
)
2780
4.69M
    return false;
2781
141M
2782
141M
  // If both candidates affect the same set in the same boundary, go with the
2783
141M
  // smallest increase.
2784
141M
  unsigned TryPSet = TryP.getPSetOrMax();
2785
141M
  unsigned CandPSet = CandP.getPSetOrMax();
2786
141M
  if (
TryPSet == CandPSet141M
) {
2787
137M
    return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2788
137M
                   Reason);
2789
137M
  }
2790
4.32M
2791
4.32M
  
int TryRank = TryP.isValid() ? 4.32M
TRI->getRegPressureSetScore(MF, TryPSet)4.20M
:
2792
123k
                                 std::numeric_limits<int>::max();
2793
4.32M
2794
1.09M
  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2795
3.22M
                                   std::numeric_limits<int>::max();
2796
4.32M
2797
4.32M
  // If the candidates are decreasing pressure, reverse priority.
2798
4.32M
  if (TryP.getUnitInc() < 0)
2799
370
    std::swap(TryRank, CandRank);
2800
146M
  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2801
146M
}
2802
2803
123M
static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2804
123M
  return (isTop) ? 
SU->WeakPredsLeft50.7M
:
SU->WeakSuccsLeft72.5M
;
2805
123M
}
2806
2807
/// Minimize physical register live ranges. Regalloc wants them adjacent to
2808
/// their physreg def/use.
2809
///
2810
/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2811
/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2812
/// with the operation that produces or consumes the physreg. We'll do this when
2813
/// regalloc has support for parallel copies.
2814
162M
static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2815
162M
  const MachineInstr *MI = SU->getInstr();
2816
162M
  if (!MI->isCopy())
2817
132M
    return 0;
2818
30.5M
2819
30.5M
  
unsigned ScheduledOper = isTop ? 30.5M
112.2M
:
018.3M
;
2820
30.5M
  unsigned UnscheduledOper = isTop ? 
012.2M
:
118.3M
;
2821
30.5M
  // If we have already scheduled the physreg produce/consumer, immediately
2822
30.5M
  // schedule the copy.
2823
30.5M
  if (TargetRegisterInfo::isPhysicalRegister(
2824
30.5M
        MI->getOperand(ScheduledOper).getReg()))
2825
20.3M
    return 1;
2826
10.2M
  // If the physreg is at the boundary, defer it. Otherwise schedule it
2827
10.2M
  // immediately to free the dependent. We can hoist the copy later.
2828
10.2M
  
bool AtBoundary = isTop ? 10.2M
!SU->NumSuccsLeft5.66M
:
!SU->NumPredsLeft4.57M
;
2829
10.2M
  if (TargetRegisterInfo::isPhysicalRegister(
2830
10.2M
        MI->getOperand(UnscheduledOper).getReg()))
2831
8.34M
    
return AtBoundary ? 8.34M
-18.30M
:
137.6k
;
2832
1.89M
  return 0;
2833
1.89M
}
2834
2835
void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2836
                                     bool AtTop,
2837
                                     const RegPressureTracker &RPTracker,
2838
83.2M
                                     RegPressureTracker &TempTracker) {
2839
83.2M
  Cand.SU = SU;
2840
83.2M
  Cand.AtTop = AtTop;
2841
83.2M
  if (
DAG->isTrackingPressure()83.2M
) {
2842
52.0M
    if (
AtTop52.0M
) {
2843
21.6M
      TempTracker.getMaxDownwardPressureDelta(
2844
21.6M
        Cand.SU->getInstr(),
2845
21.6M
        Cand.RPDelta,
2846
21.6M
        DAG->getRegionCriticalPSets(),
2847
21.6M
        DAG->getRegPressure().MaxSetPressure);
2848
52.0M
    } else {
2849
30.3M
      if (
VerifyScheduling30.3M
) {
2850
19
        TempTracker.getMaxUpwardPressureDelta(
2851
19
          Cand.SU->getInstr(),
2852
19
          &DAG->getPressureDiff(Cand.SU),
2853
19
          Cand.RPDelta,
2854
19
          DAG->getRegionCriticalPSets(),
2855
19
          DAG->getRegPressure().MaxSetPressure);
2856
30.3M
      } else {
2857
30.3M
        RPTracker.getUpwardPressureDelta(
2858
30.3M
          Cand.SU->getInstr(),
2859
30.3M
          DAG->getPressureDiff(Cand.SU),
2860
30.3M
          Cand.RPDelta,
2861
30.3M
          DAG->getRegionCriticalPSets(),
2862
30.3M
          DAG->getRegPressure().MaxSetPressure);
2863
30.3M
      }
2864
30.3M
    }
2865
52.0M
  }
2866
83.2M
  DEBUG(if (Cand.RPDelta.Excess.isValid())
2867
83.2M
          dbgs() << "  Try  SU(" << Cand.SU->NodeNum << ") "
2868
83.2M
                 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
2869
83.2M
                 << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
2870
83.2M
}
2871
2872
/// Apply a set of heursitics to a new candidate. Heuristics are currently
2873
/// hierarchical. This may be more efficient than a graduated cost model because
2874
/// we don't need to evaluate all aspects of the model for each node in the
2875
/// queue. But it's really done to make the heuristics easier to debug and
2876
/// statistically analyze.
2877
///
2878
/// \param Cand provides the policy and current best candidate.
2879
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2880
/// \param Zone describes the scheduled zone that we are extending, or nullptr
2881
//              if Cand is from a different zone than TryCand.
2882
void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2883
                                    SchedCandidate &TryCand,
2884
94.7M
                                    SchedBoundary *Zone) {
2885
94.7M
  // Initialize the candidate if needed.
2886
94.7M
  if (
!Cand.isValid()94.7M
) {
2887
13.3M
    TryCand.Reason = NodeOrder;
2888
13.3M
    return;
2889
13.3M
  }
2890
81.3M
2891
81.3M
  
if (81.3M
tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2892
81.3M
                 biasPhysRegCopy(Cand.SU, Cand.AtTop),
2893
81.3M
                 TryCand, Cand, PhysRegCopy))
2894
9.92M
    return;
2895
71.4M
2896
71.4M
  // Avoid exceeding the target's limit.
2897
71.4M
  
if (71.4M
DAG->isTrackingPressure() && 71.4M
tryPressure(TryCand.RPDelta.Excess,
2898
53.1M
                                               Cand.RPDelta.Excess,
2899
53.1M
                                               TryCand, Cand, RegExcess, TRI,
2900
53.1M
                                               DAG->MF))
2901
2.04M
    return;
2902
69.4M
2903
69.4M
  // Avoid increasing the max critical pressure in the scheduled region.
2904
69.4M
  
if (69.4M
DAG->isTrackingPressure() && 69.4M
tryPressure(TryCand.RPDelta.CriticalMax,
2905
51.1M
                                               Cand.RPDelta.CriticalMax,
2906
51.1M
                                               TryCand, Cand, RegCritical, TRI,
2907
51.1M
                                               DAG->MF))
2908
1.94M
    return;
2909
67.4M
2910
67.4M
  // We only compare a subset of features when comparing nodes between
2911
67.4M
  // Top and Bottom boundary. Some properties are simply incomparable, in many
2912
67.4M
  // other instances we should only override the other boundary if something
2913
67.4M
  // is a clear good pick on one boundary. Skip heuristics that are more
2914
67.4M
  // "tie-breaking" in nature.
2915
67.4M
  bool SameBoundary = Zone != nullptr;
2916
67.4M
  if (
SameBoundary67.4M
) {
2917
62.7M
    // For loops that are acyclic path limited, aggressively schedule for
2918
62.7M
    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2919
62.7M
    // heuristics to take precedence.
2920
62.7M
    if (
Rem.IsAcyclicLatencyLimited && 62.7M
!Zone->getCurrMOps()16.1M
&&
2921
8.99M
        tryLatency(TryCand, Cand, *Zone))
2922
150k
      return;
2923
62.5M
2924
62.5M
    // Prioritize instructions that read unbuffered resources by stall cycles.
2925
62.5M
    
if (62.5M
tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2926
62.5M
                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2927
23.1k
      return;
2928
67.3M
  }
2929
67.3M
2930
67.3M
  // Keep clustered nodes together to encourage downstream peephole
2931
67.3M
  // optimizations which may reduce resource requirements.
2932
67.3M
  //
2933
67.3M
  // This is a best effort to set things up for a post-RA pass. Optimizations
2934
67.3M
  // like generating loads of multiple registers should ideally be done within
2935
67.3M
  // the scheduler pass by combining the loads during DAG postprocessing.
2936
67.3M
  const SUnit *CandNextClusterSU =
2937
67.3M
    Cand.AtTop ? 
DAG->getNextClusterSucc()25.3M
:
DAG->getNextClusterPred()41.9M
;
2938
67.3M
  const SUnit *TryCandNextClusterSU =
2939
67.3M
    TryCand.AtTop ? 
DAG->getNextClusterSucc()30.1M
:
DAG->getNextClusterPred()37.1M
;
2940
67.3M
  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2941
67.3M
                 Cand.SU == CandNextClusterSU,
2942
67.3M
                 TryCand, Cand, Cluster))
2943
1.30M
    return;
2944
66.0M
2945
66.0M
  
if (66.0M
SameBoundary66.0M
) {
2946
61.6M
    // Weak edges are for clustering and other constraints.
2947
61.6M
    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2948
61.6M
                getWeakLeft(Cand.SU, Cand.AtTop),
2949
61.6M
                TryCand, Cand, Weak))
2950
6.67M
      return;
2951
59.3M
  }
2952
59.3M
2953
59.3M
  // Avoid increasing the max pressure of the entire region.
2954
59.3M
  
if (59.3M
DAG->isTrackingPressure() && 59.3M
tryPressure(TryCand.RPDelta.CurrentMax,
2955
42.5M
                                               Cand.RPDelta.CurrentMax,
2956
42.5M
                                               TryCand, Cand, RegMax, TRI,
2957
42.5M
                                               DAG->MF))
2958
1.01M
    return;
2959
58.3M
2960
58.3M
  
if (58.3M
SameBoundary58.3M
) {
2961
53.9M
    // Avoid critical resource consumption and balance the schedule.
2962
53.9M
    TryCand.initResourceDelta(DAG, SchedModel);
2963
53.9M
    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2964
53.9M
                TryCand, Cand, ResourceReduce))
2965
195k
      return;
2966
53.7M
    
if (53.7M
tryGreater(TryCand.ResDelta.DemandedResources,
2967
53.7M
                   Cand.ResDelta.DemandedResources,
2968
53.7M
                   TryCand, Cand, ResourceDemand))
2969
797k
      return;
2970
52.9M
2971
52.9M
    // Avoid serializing long latency dependence chains.
2972
52.9M
    // For acyclic path limited loops, latency was already checked above.
2973
52.9M
    
if (52.9M
!RegionPolicy.DisableLatencyHeuristic && 52.9M
TryCand.Policy.ReduceLatency3.29M
&&
2974
52.9M
        
!Rem.IsAcyclicLatencyLimited742k
&&
tryLatency(TryCand, Cand, *Zone)742k
)
2975
277k
      return;
2976
52.6M
2977
52.6M
    // Fall through to original instruction order.
2978
52.6M
    
if (52.6M
(Zone->isTop() && 52.6M
TryCand.SU->NodeNum < Cand.SU->NodeNum21.6M
)
2979
52.6M
        || 
(!Zone->isTop() && 51.5M
TryCand.SU->NodeNum > Cand.SU->NodeNum31.0M
)) {
2980
12.4M
      TryCand.Reason = NodeOrder;
2981
12.4M
    }
2982
53.9M
  }
2983
94.7M
}
2984
2985
/// Pick the best candidate from the queue.
2986
///
2987
/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2988
/// DAG building. To adjust for the current scheduling location we need to
2989
/// maintain the number of vreg uses remaining to be top-scheduled.
2990
void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2991
                                         const CandPolicy &ZonePolicy,
2992
                                         const RegPressureTracker &RPTracker,
2993
13.1M
                                         SchedCandidate &Cand) {
2994
13.1M
  // getMaxPressureDelta temporarily modifies the tracker.
2995
13.1M
  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2996
13.1M
2997
13.1M
  ReadyQueue &Q = Zone.Available;
2998
83.2M
  for (SUnit *SU : Q) {
2999
83.2M
3000
83.2M
    SchedCandidate TryCand(ZonePolicy);
3001
83.2M
    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3002
83.2M
    // Pass SchedBoundary only when comparing nodes from the same boundary.
3003
83.2M
    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? 
&Zone78.6M
:
nullptr4.55M
;
3004
83.2M
    tryCandidate(Cand, TryCand, ZoneArg);
3005
83.2M
    if (
TryCand.Reason != NoCand83.2M
) {
3006
28.5M
      // Initialize resource delta if needed in case future heuristics query it.
3007
28.5M
      if (TryCand.ResDelta == SchedResourceDelta())
3008
27.0M
        TryCand.initResourceDelta(DAG, SchedModel);
3009
28.5M
      Cand.setBest(TryCand);
3010
28.5M
      DEBUG(traceCandidate(Cand));
3011
28.5M
    }
3012
83.2M
  }
3013
13.1M
}
3014
3015
/// Pick the best candidate node from either the top or bottom queue.
3016
15.5M
SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3017
15.5M
  // Schedule as far as possible in the direction of no choice. This is most
3018
15.5M
  // efficient, but also provides the best heuristics for CriticalPSets.
3019
15.5M
  if (SUnit *
SU15.5M
= Bot.pickOnlyChoice()) {
3020
6.72M
    IsTopNode = false;
3021
6.72M
    tracePick(Only1, false);
3022
6.72M
    return SU;
3023
6.72M
  }
3024
8.78M
  
if (SUnit *8.78M
SU8.78M
= Top.pickOnlyChoice()) {
3025
198k
    IsTopNode = true;
3026
198k
    tracePick(Only1, true);
3027
198k
    return SU;
3028
198k
  }
3029
8.58M
  // Set the bottom-up policy based on the state of the current bottom zone and
3030
8.58M
  // the instructions outside the zone, including the top zone.
3031
8.58M
  CandPolicy BotPolicy;
3032
8.58M
  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3033
8.58M
  // Set the top-down policy based on the state of the current top zone and
3034
8.58M
  // the instructions outside the zone, including the bottom zone.
3035
8.58M
  CandPolicy TopPolicy;
3036
8.58M
  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3037
8.58M
3038
8.58M
  // See if BotCand is still valid (because we previously scheduled from Top).
3039
8.58M
  DEBUG(dbgs() << "Picking from Bot:\n");
3040
8.58M
  if (
!BotCand.isValid() || 8.58M
BotCand.SU->isScheduled3.48M
||
3041
8.58M
      
BotCand.Policy != BotPolicy666k
) {
3042
8.17M
    BotCand.reset(CandPolicy());
3043
8.17M
    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3044
8.17M
    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3045
8.58M
  } else {
3046
409k
    DEBUG(traceCandidate(BotCand));
3047
#ifndef NDEBUG
3048
    if (VerifyScheduling) {
3049
      SchedCandidate TCand;
3050
      TCand.reset(CandPolicy());
3051
      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3052
      assert(TCand.SU == BotCand.SU &&
3053
             "Last pick result should correspond to re-picking right now");
3054
    }
3055
#endif
3056
  }
3057
8.58M
3058
8.58M
  // Check if the top Q has a better candidate.
3059
8.58M
  DEBUG(dbgs() << "Picking from Top:\n");
3060
8.58M
  if (
!TopCand.isValid() || 8.58M
TopCand.SU->isScheduled5.47M
||
3061
8.58M
      
TopCand.Policy != TopPolicy4.78M
) {
3062
4.55M
    TopCand.reset(CandPolicy());
3063
4.55M
    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3064
4.55M
    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3065
8.58M
  } else {
3066
4.02M
    DEBUG(traceCandidate(TopCand));
3067
#ifndef NDEBUG
3068
    if (VerifyScheduling) {
3069
      SchedCandidate TCand;
3070
      TCand.reset(CandPolicy());
3071
      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3072
      assert(TCand.SU == TopCand.SU &&
3073
           "Last pick result should correspond to re-picking right now");
3074
    }
3075
#endif
3076
  }
3077
8.58M
3078
8.58M
  // Pick best from BotCand and TopCand.
3079
8.58M
  assert(BotCand.isValid());
3080
8.58M
  assert(TopCand.isValid());
3081
8.58M
  SchedCandidate Cand = BotCand;
3082
8.58M
  TopCand.Reason = NoCand;
3083
8.58M
  tryCandidate(Cand, TopCand, nullptr);
3084
8.58M
  if (
TopCand.Reason != NoCand8.58M
) {
3085
1.09M
    Cand.setBest(TopCand);
3086
1.09M
    DEBUG(traceCandidate(Cand));
3087
1.09M
  }
3088
15.5M
3089
15.5M
  IsTopNode = Cand.AtTop;
3090
15.5M
  tracePick(Cand);
3091
15.5M
  return Cand.SU;
3092
15.5M
}
3093
3094
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3095
20.3M
SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3096
20.3M
  if (
DAG->top() == DAG->bottom()20.3M
) {
3097
4.05M
    assert(Top.Available.empty() && Top.Pending.empty() &&
3098
4.05M
           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3099
4.05M
    return nullptr;
3100
4.05M
  }
3101
16.2M
  SUnit *SU;
3102
16.2M
  do {
3103
16.2M
    if (
RegionPolicy.OnlyTopDown16.2M
) {
3104
54
      SU = Top.pickOnlyChoice();
3105
54
      if (
!SU54
) {
3106
46
        CandPolicy NoPolicy;
3107
46
        TopCand.reset(NoPolicy);
3108
46
        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3109
46
        assert(TopCand.Reason != NoCand && "failed to find a candidate");
3110
46
        tracePick(TopCand);
3111
46
        SU = TopCand.SU;
3112
46
      }
3113
54
      IsTopNode = true;
3114
16.2M
    } else 
if (16.2M
RegionPolicy.OnlyBottomUp16.2M
) {
3115
756k
      SU = Bot.pickOnlyChoice();
3116
756k
      if (
!SU756k
) {
3117
380k
        CandPolicy NoPolicy;
3118
380k
        BotCand.reset(NoPolicy);
3119
380k
        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3120
380k
        assert(BotCand.Reason != NoCand && "failed to find a candidate");
3121
380k
        tracePick(BotCand);
3122
380k
        SU = BotCand.SU;
3123
380k
      }
3124
756k
      IsTopNode = false;
3125
16.2M
    } else {
3126
15.5M
      SU = pickNodeBidirectional(IsTopNode);
3127
15.5M
    }
3128
16.2M
  } while (SU->isScheduled);
3129
16.2M
3130
16.2M
  if (SU->isTopReady())
3131
10.7M
    Top.removeReady(SU);
3132
16.2M
  if (SU->isBottomReady())
3133
15.9M
    Bot.removeReady(SU);
3134
16.2M
3135
16.2M
  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3136
20.3M
  return SU;
3137
20.3M
}
3138
3139
1.34M
void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
3140
1.34M
  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3141
1.34M
  if (!isTop)
3142
279k
    ++InsertPos;
3143
1.34M
  SmallVectorImpl<SDep> &Deps = isTop ? 
SU->Preds1.06M
:
SU->Succs279k
;
3144
1.34M
3145
1.34M
  // Find already scheduled copies with a single physreg dependence and move
3146
1.34M
  // them just above the scheduled instruction.
3147
557k
  for (SDep &Dep : Deps) {
3148
557k
    if (
Dep.getKind() != SDep::Data || 557k
!TRI->isPhysicalRegister(Dep.getReg())326k
)
3149
248k
      continue;
3150
309k
    SUnit *DepSU = Dep.getSUnit();
3151
309k
    if (
isTop ? 309k
DepSU->Succs.size() > 18.76k
:
DepSU->Preds.size() > 1300k
)
3152
187k
      continue;
3153
122k
    MachineInstr *Copy = DepSU->getInstr();
3154
122k
    if (!Copy->isCopy())
3155
118k
      continue;
3156
3.97k
    
DEBUG3.97k
(dbgs() << " Rescheduling physreg copy ";
3157
3.97k
          Dep.getSUnit()->dump(DAG));
3158
3.97k
    DAG->moveInstruction(Copy, InsertPos);
3159
3.97k
  }
3160
1.34M
}
3161
3162
/// Update the scheduler's state after scheduling a node. This is the same node
3163
/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3164
/// update it's state based on the current cycle before MachineSchedStrategy
3165
/// does.
3166
///
3167
/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3168
/// them here. See comments in biasPhysRegCopy.
3169
16.5M
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3170
16.5M
  if (
IsTopNode16.5M
) {
3171
1.34M
    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3172
1.34M
    Top.bumpNode(SU);
3173
1.34M
    if (SU->hasPhysRegUses)
3174
1.06M
      reschedulePhysRegCopies(SU, true);
3175
16.5M
  } else {
3176
15.1M
    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3177
15.1M
    Bot.bumpNode(SU);
3178
15.1M
    if (SU->hasPhysRegDefs)
3179
279k
      reschedulePhysRegCopies(SU, false);
3180
15.1M
  }
3181
16.5M
}
3182
3183
/// Create the standard converging machine scheduler. This will be used as the
3184
/// default scheduler if the target does not set a default.
3185
534k
ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3186
534k
  ScheduleDAGMILive *DAG =
3187
534k
      new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3188
534k
  // Register DAG post-processors.
3189
534k
  //
3190
534k
  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3191
534k
  // data and pass it to later mutations. Have a single mutation that gathers
3192
534k
  // the interesting nodes in one pass.
3193
534k
  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3194
534k
  return DAG;
3195
534k
}
3196
3197
0
static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3198
0
  return createGenericSchedLive(C);
3199
0
}
3200
3201
static MachineSchedRegistry
3202
GenericSchedRegistry("converge", "Standard converging scheduler.",
3203
                     createConveringSched);
3204
3205
//===----------------------------------------------------------------------===//
3206
// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3207
//===----------------------------------------------------------------------===//
3208
3209
6.12k
void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3210
6.12k
  DAG = Dag;
3211
6.12k
  SchedModel = DAG->getSchedModel();
3212
6.12k
  TRI = DAG->TRI;
3213
6.12k
3214
6.12k
  Rem.init(DAG, SchedModel);
3215
6.12k
  Top.init(DAG, SchedModel, &Rem);
3216
6.12k
  BotRoots.clear();
3217
6.12k
3218
6.12k
  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3219
6.12k
  // or are disabled, then these HazardRecs will be disabled.
3220
6.12k
  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3221
6.12k
  if (
!Top.HazardRec6.12k
) {
3222
5.18k
    Top.HazardRec =
3223
5.18k
        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3224
5.18k
            Itin, DAG);
3225
5.18k
  }
3226
6.12k
}
3227
3228
6.12k
void PostGenericScheduler::registerRoots() {
3229
6.12k
  Rem.CriticalPath = DAG->ExitSU.getDepth();
3230
6.12k
3231
6.12k
  // Some roots may not feed into ExitSU. Check all of them in case.
3232
8.07k
  for (const SUnit *SU : BotRoots) {
3233
8.07k
    if (SU->getDepth() > Rem.CriticalPath)
3234
986
      Rem.CriticalPath = SU->getDepth();
3235
8.07k
  }
3236
6.12k
  DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3237
6.12k
  if (
DumpCriticalPathLength6.12k
) {
3238
0
    errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3239
0
  }
3240
6.12k
}
3241
3242
/// Apply a set of heursitics to a new candidate for PostRA scheduling.
3243
///
3244
/// \param Cand provides the policy and current best candidate.
3245
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3246
void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3247
30.8k
                                        SchedCandidate &TryCand) {
3248
30.8k
  // Initialize the candidate if needed.
3249
30.8k
  if (
!Cand.isValid()30.8k
) {
3250
8.89k
    TryCand.Reason = NodeOrder;
3251
8.89k
    return;
3252
8.89k
  }
3253
21.9k
3254
21.9k
  // Prioritize instructions that read unbuffered resources by stall cycles.
3255
21.9k
  
if (21.9k
tryLess(Top.getLatencyStallCycles(TryCand.SU),
3256
21.9k
              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3257
25
    return;
3258
21.9k
3259
21.9k
  // Keep clustered nodes together.
3260
21.9k
  
if (21.9k
tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3261
21.9k
                 Cand.SU == DAG->getNextClusterSucc(),
3262
21.9k
                 TryCand, Cand, Cluster))
3263
117
    return;
3264
21.7k
3265
21.7k
  // Avoid critical resource consumption and balance the schedule.
3266
21.7k
  
if (21.7k
tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3267
21.7k
              TryCand, Cand, ResourceReduce))
3268
35
    return;
3269
21.7k
  
if (21.7k
tryGreater(TryCand.ResDelta.DemandedResources,
3270
21.7k
                 Cand.ResDelta.DemandedResources,
3271
21.7k
                 TryCand, Cand, ResourceDemand))
3272
0
    return;
3273
21.7k
3274
21.7k
  // Avoid serializing long latency dependence chains.
3275
21.7k
  
if (21.7k
Cand.Policy.ReduceLatency && 21.7k
tryLatency(TryCand, Cand, Top)21.7k
) {
3276
8.52k
    return;
3277
8.52k
  }
3278
13.2k
3279
13.2k
  // Fall through to original instruction order.
3280
13.2k
  
if (13.2k
TryCand.SU->NodeNum < Cand.SU->NodeNum13.2k
)
3281
5.04k
    TryCand.Reason = NodeOrder;
3282
30.8k
}
3283
3284
8.89k
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3285
8.89k
  ReadyQueue &Q = Top.Available;
3286
30.8k
  for (SUnit *SU : Q) {
3287
30.8k
    SchedCandidate TryCand(Cand.Policy);
3288
30.8k
    TryCand.SU = SU;
3289
30.8k
    TryCand.AtTop = true;
3290
30.8k
    TryCand.initResourceDelta(DAG, SchedModel);
3291
30.8k
    tryCandidate(Cand, TryCand);
3292
30.8k
    if (
TryCand.Reason != NoCand30.8k
) {
3293
16.9k
      Cand.setBest(TryCand);
3294
16.9k
      DEBUG(traceCandidate(Cand));
3295
16.9k
    }
3296
30.8k
  }
3297
8.89k
}
3298
3299
/// Pick the next node to schedule.
3300
32.6k
SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3301
32.6k
  if (
DAG->top() == DAG->bottom()32.6k
) {
3302
6.12k
    assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3303
6.12k
    return nullptr;
3304
6.12k
  }
3305
26.5k
  SUnit *SU;
3306
26.5k
  do {
3307
26.5k
    SU = Top.pickOnlyChoice();
3308
26.5k
    if (
SU26.5k
) {
3309
17.6k
      tracePick(Only1, true);
3310
26.5k
    } else {
3311
8.89k
      CandPolicy NoPolicy;
3312
8.89k
      SchedCandidate TopCand(NoPolicy);
3313
8.89k
      // Set the top-down policy based on the state of the current top zone and
3314
8.89k
      // the instructions outside the zone, including the bottom zone.
3315
8.89k
      setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3316
8.89k
      pickNodeFromQueue(TopCand);
3317
8.89k
      assert(TopCand.Reason != NoCand && "failed to find a candidate");
3318
8.89k
      tracePick(TopCand);
3319
8.89k
      SU = TopCand.SU;
3320
8.89k
    }
3321
26.5k
  } while (SU->isScheduled);
3322
26.5k
3323
26.5k
  IsTopNode = true;
3324
26.5k
  Top.removeReady(SU);
3325
26.5k
3326
26.5k
  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3327
32.6k
  return SU;
3328
32.6k
}
3329
3330
/// Called after ScheduleDAGMI has scheduled an instruction and updated
3331
/// scheduled/remaining flags in the DAG nodes.
3332
26.5k
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3333
26.5k
  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3334
26.5k
  Top.bumpNode(SU);
3335
26.5k
}
3336
3337
8.60k
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3338
8.60k
  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3339
8.60k
                           /*RemoveKillFlags=*/true);
3340
8.60k
}
3341
3342
//===----------------------------------------------------------------------===//
3343
// ILP Scheduler. Currently for experimental analysis of heuristics.
3344
//===----------------------------------------------------------------------===//
3345
3346
namespace {
3347
3348
/// \brief Order nodes by the ILP metric.
3349
struct ILPOrder {
3350
  const SchedDFSResult *DFSResult = nullptr;
3351
  const BitVector *ScheduledTrees = nullptr;
3352
  bool MaximizeILP;
3353
3354
8
  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3355
3356
  /// \brief Apply a less-than relation on node priority.
3357
  ///
3358
  /// (Return true if A comes after B in the Q.)
3359
326
  bool operator()(const SUnit *A, const SUnit *B) const {
3360
326
    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3361
326
    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3362
326
    if (
SchedTreeA != SchedTreeB326
) {
3363
174
      // Unscheduled trees have lower priority.
3364
174
      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3365
113
        return ScheduledTrees->test(SchedTreeB);
3366
61
3367
61
      // Trees with shallower connections have have lower priority.
3368
61
      
if (61
DFSResult->getSubtreeLevel(SchedTreeA)
3369
61
          != DFSResult->getSubtreeLevel(SchedTreeB)) {
3370
0
        return DFSResult->getSubtreeLevel(SchedTreeA)
3371
0
          < DFSResult->getSubtreeLevel(SchedTreeB);
3372
0
      }
3373
213
    }
3374
213
    
if (213
MaximizeILP213
)
3375
125
      return DFSResult->getILP(A) < DFSResult->getILP(B);
3376
213
    else
3377
88
      return DFSResult->getILP(A) > DFSResult->getILP(B);
3378
0
  }
3379
};
3380
3381
/// \brief Schedule based on the ILP metric.
3382
class ILPScheduler : public MachineSchedStrategy {
3383
  ScheduleDAGMILive *DAG = nullptr;
3384
  ILPOrder Cmp;
3385
3386
  std::vector<SUnit*> ReadyQ;
3387
3388
public:
3389
8
  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3390
3391
10
  void initialize(ScheduleDAGMI *dag) override {
3392
10
    assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3393
10
    DAG = static_cast<ScheduleDAGMILive*>(dag);
3394
10
    DAG->computeDFSResult();
3395
10
    Cmp.DFSResult = DAG->getDFSResult();
3396
10
    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3397
10
    ReadyQ.clear();
3398
10
  }
3399
3400
10
  void registerRoots() override {
3401
10
    // Restore the heap in ReadyQ with the updated DFS results.
3402
10
    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3403
10
  }
3404
3405
  /// Implement MachineSchedStrategy interface.
3406
  /// -----------------------------------------
3407
3408
  /// Callback to select the highest priority node from the ready Q.
3409
186
  SUnit *pickNode(bool &IsTopNode) override {
3410
186
    if (
ReadyQ.empty()186
)
return nullptr10
;
3411
176
    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3412
176
    SUnit *SU = ReadyQ.back();
3413
176
    ReadyQ.pop_back();
3414
176
    IsTopNode = false;
3415
176
    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3416
186
          << " ILP: " << DAG->getDFSResult()->getILP(SU)
3417
186
          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3418
186
          << DAG->getDFSResult()->getSubtreeLevel(
3419
186
            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3420
186
          << "Scheduling " << *SU->getInstr());
3421
186
    return SU;
3422
186
  }
3423
3424
  /// \brief Scheduler callback to notify that a new subtree is scheduled.
3425
38
  void scheduleTree(unsigned SubtreeID) override {
3426
38
    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3427
38
  }
3428
3429
  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3430
  /// DFSResults, and resort the priority Q.
3431
176
  void schedNode(SUnit *SU, bool IsTopNode) override {
3432
176
    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3433
176
  }
3434
3435
64
  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3436
3437
176
  void releaseBottomNode(SUnit *SU) override {
3438
176
    ReadyQ.push_back(SU);
3439
176
    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3440
176
  }
3441
};
3442
3443
} // end anonymous namespace
3444
3445
6
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3446
6
  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3447
6
}
3448
2
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3449
2
  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3450
2
}
3451
3452
static MachineSchedRegistry ILPMaxRegistry(
3453
  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3454
static MachineSchedRegistry ILPMinRegistry(
3455
  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3456
3457
//===----------------------------------------------------------------------===//
3458
// Machine Instruction Shuffler for Correctness Testing
3459
//===----------------------------------------------------------------------===//
3460
3461
#ifndef NDEBUG
3462
namespace {
3463
3464
/// Apply a less-than relation on the node order, which corresponds to the
3465
/// instruction order prior to scheduling. IsReverse implements greater-than.
3466
template<bool IsReverse>
3467
struct SUnitOrder {
3468
  bool operator()(SUnit *A, SUnit *B) const {
3469
    if (IsReverse)
3470
      return A->NodeNum > B->NodeNum;
3471
    else
3472
      return A->NodeNum < B->NodeNum;
3473
  }
3474
};
3475
3476
/// Reorder instructions as much as possible.
3477
class InstructionShuffler : public MachineSchedStrategy {
3478
  bool IsAlternating;
3479
  bool IsTopDown;
3480
3481
  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3482
  // gives nodes with a higher number higher priority causing the latest
3483
  // instructions to be scheduled first.
3484
  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3485
    TopQ;
3486
3487
  // When scheduling bottom-up, use greater-than as the queue priority.
3488
  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3489
    BottomQ;
3490
3491
public:
3492
  InstructionShuffler(bool alternate, bool topdown)
3493
    : IsAlternating(alternate), IsTopDown(topdown) {}
3494
3495
  void initialize(ScheduleDAGMI*) override {
3496
    TopQ.clear();
3497
    BottomQ.clear();
3498
  }
3499
3500
  /// Implement MachineSchedStrategy interface.
3501
  /// -----------------------------------------
3502
3503
  SUnit *pickNode(bool &IsTopNode) override {
3504
    SUnit *SU;
3505
    if (IsTopDown) {
3506
      do {
3507
        if (TopQ.empty()) return nullptr;
3508
        SU = TopQ.top();
3509
        TopQ.pop();
3510
      } while (SU->isScheduled);
3511
      IsTopNode = true;
3512
    } else {
3513
      do {
3514
        if (BottomQ.empty()) return nullptr;
3515
        SU = BottomQ.top();
3516
        BottomQ.pop();
3517
      } while (SU->isScheduled);
3518
      IsTopNode = false;
3519
    }
3520
    if (IsAlternating)
3521
      IsTopDown = !IsTopDown;
3522
    return SU;
3523
  }
3524
3525
  void schedNode(SUnit *SU, bool IsTopNode) override {}
3526
3527
  void releaseTopNode(SUnit *SU) override {
3528
    TopQ.push(SU);
3529
  }
3530
  void releaseBottomNode(SUnit *SU) override {
3531
    BottomQ.push(SU);
3532
  }
3533
};
3534
3535
} // end anonymous namespace
3536
3537
static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3538
  bool Alternate = !ForceTopDown && !ForceBottomUp;
3539
  bool TopDown = !ForceBottomUp;
3540
  assert((TopDown || !ForceTopDown) &&
3541
         "-misched-topdown incompatible with -misched-bottomup");
3542
  return new ScheduleDAGMILive(
3543
      C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3544
}
3545
3546
static MachineSchedRegistry ShufflerRegistry(
3547
  "shuffle", "Shuffle machine instructions alternating directions",
3548
  createInstructionShuffler);
3549
#endif // !NDEBUG
3550
3551
//===----------------------------------------------------------------------===//
3552
// GraphWriter support for ScheduleDAGMILive.
3553
//===----------------------------------------------------------------------===//
3554
3555
#ifndef NDEBUG
3556
namespace llvm {
3557
3558
template<> struct GraphTraits<
3559
  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3560
3561
template<>
3562
struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3563
  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3564
3565
  static std::string getGraphName(const ScheduleDAG *G) {
3566
    return G->MF.getName();
3567
  }
3568
3569
  static bool renderGraphFromBottomUp() {
3570
    return true;
3571
  }
3572
3573
  static bool isNodeHidden(const SUnit *Node) {
3574
    if (ViewMISchedCutoff == 0)
3575
      return false;
3576
    return (Node->Preds.size() > ViewMISchedCutoff
3577
         || Node->Succs.size() > ViewMISchedCutoff);
3578
  }
3579
3580
  /// If you want to override the dot attributes printed for a particular
3581
  /// edge, override this method.
3582
  static std::string getEdgeAttributes(const SUnit *Node,
3583
                                       SUnitIterator EI,
3584
                                       const ScheduleDAG *Graph) {
3585
    if (EI.isArtificialDep())
3586
      return "color=cyan,style=dashed";
3587
    if (EI.isCtrlDep())
3588
      return "color=blue,style=dashed";
3589
    return "";
3590
  }
3591
3592
  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3593
    std::string Str;
3594
    raw_string_ostream SS(Str);
3595
    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3596
    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3597
      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3598
    SS << "SU:" << SU->NodeNum;
3599
    if (DFS)
3600
      SS << " I:" << DFS->getNumInstrs(SU);
3601
    return SS.str();
3602
  }
3603
3604
  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3605
    return G->getGraphNodeLabel(SU);
3606
  }
3607
3608
  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3609
    std::string Str("shape=Mrecord");
3610
    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3611
    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3612
      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3613
    if (DFS) {
3614
      Str += ",style=filled,fillcolor=\"#";
3615
      Str += DOT::getColorString(DFS->getSubtreeID(N));
3616
      Str += '"';
3617
    }
3618
    return Str;
3619
  }
3620
};
3621
3622
} // end namespace llvm
3623
#endif // NDEBUG
3624
3625
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3626
/// rendered using 'dot'.
3627
0
void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3628
#ifndef NDEBUG
3629
  ViewGraph(this, Name, false, Title);
3630
#else
3631
  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3632
0
         << "systems with Graphviz or gv!\n";
3633
0
#endif  // NDEBUG
3634
0
}
3635
3636
/// Out-of-line implementation with no arguments is handy for gdb.
3637
0
void ScheduleDAGMI::viewGraph() {
3638
0
  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3639
0
}