Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This implements bottom-up and top-down register pressure reduction list
11
// schedulers, using standard algorithms.  The basic approach uses a priority
12
// queue of available nodes to schedule.  One at a time, nodes are taken from
13
// the priority queue (thus in priority order), checked for legality to
14
// schedule, and emitted if legal.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "ScheduleDAGSDNodes.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SmallSet.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/CodeGen/MachineRegisterInfo.h"
23
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24
#include "llvm/CodeGen/SchedulerRegistry.h"
25
#include "llvm/CodeGen/SelectionDAGISel.h"
26
#include "llvm/IR/DataLayout.h"
27
#include "llvm/IR/InlineAsm.h"
28
#include "llvm/Support/Debug.h"
29
#include "llvm/Support/ErrorHandling.h"
30
#include "llvm/Support/raw_ostream.h"
31
#include "llvm/Target/TargetInstrInfo.h"
32
#include "llvm/Target/TargetLowering.h"
33
#include "llvm/Target/TargetRegisterInfo.h"
34
#include "llvm/Target/TargetSubtargetInfo.h"
35
#include <climits>
36
using namespace llvm;
37
38
#define DEBUG_TYPE "pre-RA-sched"
39
40
STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
41
STATISTIC(NumUnfolds,    "Number of nodes unfolded");
42
STATISTIC(NumDups,       "Number of duplicated nodes");
43
STATISTIC(NumPRCopies,   "Number of physical register copies");
44
45
static RegisterScheduler
46
  burrListDAGScheduler("list-burr",
47
                       "Bottom-up register reduction list scheduling",
48
                       createBURRListDAGScheduler);
49
static RegisterScheduler
50
  sourceListDAGScheduler("source",
51
                         "Similar to list-burr but schedules in source "
52
                         "order when possible",
53
                         createSourceListDAGScheduler);
54
55
static RegisterScheduler
56
  hybridListDAGScheduler("list-hybrid",
57
                         "Bottom-up register pressure aware list scheduling "
58
                         "which tries to balance latency and register pressure",
59
                         createHybridListDAGScheduler);
60
61
static RegisterScheduler
62
  ILPListDAGScheduler("list-ilp",
63
                      "Bottom-up register pressure aware list scheduling "
64
                      "which tries to balance ILP and register pressure",
65
                      createILPListDAGScheduler);
66
67
static cl::opt<bool> DisableSchedCycles(
68
  "disable-sched-cycles", cl::Hidden, cl::init(false),
69
  cl::desc("Disable cycle-level precision during preRA scheduling"));
70
71
// Temporary sched=list-ilp flags until the heuristics are robust.
72
// Some options are also available under sched=list-hybrid.
73
static cl::opt<bool> DisableSchedRegPressure(
74
  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
75
  cl::desc("Disable regpressure priority in sched=list-ilp"));
76
static cl::opt<bool> DisableSchedLiveUses(
77
  "disable-sched-live-uses", cl::Hidden, cl::init(true),
78
  cl::desc("Disable live use priority in sched=list-ilp"));
79
static cl::opt<bool> DisableSchedVRegCycle(
80
  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
81
  cl::desc("Disable virtual register cycle interference checks"));
82
static cl::opt<bool> DisableSchedPhysRegJoin(
83
  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
84
  cl::desc("Disable physreg def-use affinity"));
85
static cl::opt<bool> DisableSchedStalls(
86
  "disable-sched-stalls", cl::Hidden, cl::init(true),
87
  cl::desc("Disable no-stall priority in sched=list-ilp"));
88
static cl::opt<bool> DisableSchedCriticalPath(
89
  "disable-sched-critical-path", cl::Hidden, cl::init(false),
90
  cl::desc("Disable critical path priority in sched=list-ilp"));
91
static cl::opt<bool> DisableSchedHeight(
92
  "disable-sched-height", cl::Hidden, cl::init(false),
93
  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
94
static cl::opt<bool> Disable2AddrHack(
95
  "disable-2addr-hack", cl::Hidden, cl::init(true),
96
  cl::desc("Disable scheduler's two-address hack"));
97
98
static cl::opt<int> MaxReorderWindow(
99
  "max-sched-reorder", cl::Hidden, cl::init(6),
100
  cl::desc("Number of instructions to allow ahead of the critical path "
101
           "in sched=list-ilp"));
102
103
static cl::opt<unsigned> AvgIPC(
104
  "sched-avg-ipc", cl::Hidden, cl::init(1),
105
  cl::desc("Average inst/cycle whan no target itinerary exists."));
106
107
namespace {
108
//===----------------------------------------------------------------------===//
109
/// ScheduleDAGRRList - The actual register reduction list scheduler
110
/// implementation.  This supports both top-down and bottom-up scheduling.
111
///
112
class ScheduleDAGRRList : public ScheduleDAGSDNodes {
113
private:
114
  /// NeedLatency - True if the scheduler will make use of latency information.
115
  ///
116
  bool NeedLatency;
117
118
  /// AvailableQueue - The priority queue to use for the available SUnits.
119
  SchedulingPriorityQueue *AvailableQueue;
120
121
  /// PendingQueue - This contains all of the instructions whose operands have
122
  /// been issued, but their results are not ready yet (due to the latency of
123
  /// the operation).  Once the operands becomes available, the instruction is
124
  /// added to the AvailableQueue.
125
  std::vector<SUnit*> PendingQueue;
126
127
  /// HazardRec - The hazard recognizer to use.
128
  ScheduleHazardRecognizer *HazardRec;
129
130
  /// CurCycle - The current scheduler state corresponds to this cycle.
131
  unsigned CurCycle;
132
133
  /// MinAvailableCycle - Cycle of the soonest available instruction.
134
  unsigned MinAvailableCycle;
135
136
  /// IssueCount - Count instructions issued in this cycle
137
  /// Currently valid only for bottom-up scheduling.
138
  unsigned IssueCount;
139
140
  /// LiveRegDefs - A set of physical registers and their definition
141
  /// that are "live". These nodes must be scheduled before any other nodes that
142
  /// modifies the registers can be scheduled.
143
  unsigned NumLiveRegs;
144
  std::unique_ptr<SUnit*[]> LiveRegDefs;
145
  std::unique_ptr<SUnit*[]> LiveRegGens;
146
147
  // Collect interferences between physical register use/defs.
148
  // Each interference is an SUnit and set of physical registers.
149
  SmallVector<SUnit*, 4> Interferences;
150
  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
151
  LRegsMapT LRegsMap;
152
153
  /// Topo - A topological ordering for SUnits which permits fast IsReachable
154
  /// and similar queries.
155
  ScheduleDAGTopologicalSort Topo;
156
157
  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
158
  // DAG crawling.
159
  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
160
161
public:
162
  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
163
                    SchedulingPriorityQueue *availqueue,
164
                    CodeGenOpt::Level OptLevel)
165
    : ScheduleDAGSDNodes(mf),
166
      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
167
3.42M
      Topo(SUnits, nullptr) {
168
3.42M
169
3.42M
    const TargetSubtargetInfo &STI = mf.getSubtarget();
170
3.42M
    if (
DisableSchedCycles || 3.42M
!NeedLatency3.42M
)
171
3.37M
      HazardRec = new ScheduleHazardRecognizer();
172
3.42M
    else
173
52.4k
      HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
174
3.42M
  }
175
176
3.42M
  ~ScheduleDAGRRList() override {
177
3.42M
    delete HazardRec;
178
3.42M
    delete AvailableQueue;
179
3.42M
  }
180
181
  void Schedule() override;
182
183
16.3M
  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
184
185
  /// IsReachable - Checks if SU is reachable from TargetSU.
186
391
  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
187
391
    return Topo.IsReachable(SU, TargetSU);
188
391
  }
189
190
  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
191
  /// create a cycle.
192
3.85k
  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
193
3.85k
    return Topo.WillCreateCycle(SU, TargetSU);
194
3.85k
  }
195
196
  /// AddPred - adds a predecessor edge to SUnit SU.
197
  /// This returns true if this is a new predecessor.
198
  /// Updates the topological ordering if required.
199
4.18k
  void AddPred(SUnit *SU, const SDep &D) {
200
4.18k
    Topo.AddPred(SU, D.getSUnit());
201
4.18k
    SU->addPred(D);
202
4.18k
  }
203
204
  /// RemovePred - removes a predecessor edge from SUnit SU.
205
  /// This returns true if an edge was removed.
206
  /// Updates the topological ordering if required.
207
853
  void RemovePred(SUnit *SU, const SDep &D) {
208
853
    Topo.RemovePred(SU, D.getSUnit());
209
853
    SU->removePred(D);
210
853
  }
211
212
private:
213
26.3M
  bool isReady(SUnit *SU) {
214
26.3M
    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
215
0
      AvailableQueue->isReady(SU);
216
26.3M
  }
217
218
  void ReleasePred(SUnit *SU, const SDep *PredEdge);
219
  void ReleasePredecessors(SUnit *SU);
220
  void ReleasePending();
221
  void AdvanceToCycle(unsigned NextCycle);
222
  void AdvancePastStalls(SUnit *SU);
223
  void EmitNode(SUnit *SU);
224
  void ScheduleNodeBottomUp(SUnit*);
225
  void CapturePred(SDep *PredEdge);
226
  void UnscheduleNodeBottomUp(SUnit*);
227
  void RestoreHazardCheckerBottomUp();
228
  void BacktrackBottomUp(SUnit*, SUnit*);
229
  SUnit *TryUnfoldSU(SUnit *);
230
  SUnit *CopyAndMoveSuccessors(SUnit*);
231
  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
232
                                const TargetRegisterClass*,
233
                                const TargetRegisterClass*,
234
                                SmallVectorImpl<SUnit*>&);
235
  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
236
237
  void releaseInterferences(unsigned Reg = 0);
238
239
  SUnit *PickNodeToScheduleBottomUp();
240
  void ListScheduleBottomUp();
241
242
  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
243
  /// Updates the topological ordering if required.
244
130
  SUnit *CreateNewSUnit(SDNode *N) {
245
130
    unsigned NumSUnits = SUnits.size();
246
130
    SUnit *NewNode = newSUnit(N);
247
130
    // Update the topological ordering.
248
130
    if (NewNode->NodeNum >= NumSUnits)
249
130
      Topo.InitDAGTopologicalSorting();
250
130
    return NewNode;
251
130
  }
252
253
  /// CreateClone - Creates a new SUnit from an existing one.
254
  /// Updates the topological ordering if required.
255
520
  SUnit *CreateClone(SUnit *N) {
256
520
    unsigned NumSUnits = SUnits.size();
257
520
    SUnit *NewNode = Clone(N);
258
520
    // Update the topological ordering.
259
520
    if (NewNode->NodeNum >= NumSUnits)
260
520
      Topo.InitDAGTopologicalSorting();
261
520
    return NewNode;
262
520
  }
263
264
  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
265
  /// need actual latency information but the hybrid scheduler does.
266
66.0M
  bool forceUnitLatencies() const override {
267
66.0M
    return !NeedLatency;
268
66.0M
  }
269
};
270
}  // end anonymous namespace
271
272
/// GetCostForDef - Looks up the register class and cost for a given definition.
273
/// Typically this just means looking up the representative register class,
274
/// but for untyped values (MVT::Untyped) it means inspecting the node's
275
/// opcode to determine what register class is being generated.
276
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
277
                          const TargetLowering *TLI,
278
                          const TargetInstrInfo *TII,
279
                          const TargetRegisterInfo *TRI,
280
                          unsigned &RegClass, unsigned &Cost,
281
942k
                          const MachineFunction &MF) {
282
942k
  MVT VT = RegDefPos.GetValue();
283
942k
284
942k
  // Special handling for untyped values.  These values can only come from
285
942k
  // the expansion of custom DAG-to-DAG patterns.
286
942k
  if (
VT == MVT::Untyped942k
) {
287
1.95k
    const SDNode *Node = RegDefPos.GetNode();
288
1.95k
289
1.95k
    // Special handling for CopyFromReg of untyped values.
290
1.95k
    if (
!Node->isMachineOpcode() && 1.95k
Node->getOpcode() == ISD::CopyFromReg136
) {
291
136
      unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
292
136
      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
293
136
      RegClass = RC->getID();
294
136
      Cost = 1;
295
136
      return;
296
136
    }
297
1.81k
298
1.81k
    unsigned Opcode = Node->getMachineOpcode();
299
1.81k
    if (
Opcode == TargetOpcode::REG_SEQUENCE1.81k
) {
300
255
      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
301
255
      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
302
255
      RegClass = RC->getID();
303
255
      Cost = 1;
304
255
      return;
305
255
    }
306
1.55k
307
1.55k
    unsigned Idx = RegDefPos.GetIdx();
308
1.55k
    const MCInstrDesc Desc = TII->get(Opcode);
309
1.55k
    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
310
1.55k
    RegClass = RC->getID();
311
1.55k
    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
312
1.55k
    // better way to determine it.
313
1.55k
    Cost = 1;
314
942k
  } else {
315
940k
    RegClass = TLI->getRepRegClassFor(VT)->getID();
316
940k
    Cost = TLI->getRepRegClassCostFor(VT);
317
940k
  }
318
942k
}
319
320
/// Schedule - Schedule the DAG using list scheduling.
321
3.42M
void ScheduleDAGRRList::Schedule() {
322
3.42M
  DEBUG(dbgs()
323
3.42M
        << "********** List Scheduling BB#" << BB->getNumber()
324
3.42M
        << " '" << BB->getName() << "' **********\n");
325
3.42M
326
3.42M
  CurCycle = 0;
327
3.42M
  IssueCount = 0;
328
0
  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
329
3.42M
  NumLiveRegs = 0;
330
3.42M
  // Allocate slots for each physical register, plus one for a special register
331
3.42M
  // to track the virtual resource of a calling sequence.
332
3.42M
  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
333
3.42M
  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
334
3.42M
  CallSeqEndForStart.clear();
335
3.42M
  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
336
3.42M
337
3.42M
  // Build the scheduling graph.
338
3.42M
  BuildSchedGraph(nullptr);
339
3.42M
340
3.42M
  DEBUG(for (SUnit &SU : SUnits)
341
3.42M
          SU.dumpAll(this));
342
3.42M
  Topo.InitDAGTopologicalSorting();
343
3.42M
344
3.42M
  AvailableQueue->initNodes(SUnits);
345
3.42M
346
3.42M
  HazardRec->Reset();
347
3.42M
348
3.42M
  // Execute the actual scheduling loop.
349
3.42M
  ListScheduleBottomUp();
350
3.42M
351
3.42M
  AvailableQueue->releaseState();
352
3.42M
353
3.42M
  DEBUG({
354
3.42M
      dbgs() << "*** Final schedule ***\n";
355
3.42M
      dumpSchedule();
356
3.42M
      dbgs() << '\n';
357
3.42M
    });
358
3.42M
}
359
360
//===----------------------------------------------------------------------===//
361
//  Bottom-Up Scheduling
362
//===----------------------------------------------------------------------===//
363
364
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
365
/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
366
34.2M
void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
367
34.2M
  SUnit *PredSU = PredEdge->getSUnit();
368
34.2M
369
#ifndef NDEBUG
370
  if (PredSU->NumSuccsLeft == 0) {
371
    dbgs() << "*** Scheduling failed! ***\n";
372
    PredSU->dump(this);
373
    dbgs() << " has been released too many times!\n";
374
    llvm_unreachable(nullptr);
375
  }
376
#endif
377
  --PredSU->NumSuccsLeft;
378
34.2M
379
34.2M
  if (
!forceUnitLatencies()34.2M
) {
380
461k
    // Updating predecessor's height. This is now the cycle when the
381
461k
    // predecessor can be scheduled without causing a pipeline stall.
382
461k
    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
383
461k
  }
384
34.2M
385
34.2M
  // If all the node's successors are scheduled, this node is ready
386
34.2M
  // to be scheduled. Ignore the special EntrySU node.
387
34.2M
  if (
PredSU->NumSuccsLeft == 0 && 34.2M
PredSU != &EntrySU26.3M
) {
388
26.3M
    PredSU->isAvailable = true;
389
26.3M
390
26.3M
    unsigned Height = PredSU->getHeight();
391
26.3M
    if (Height < MinAvailableCycle)
392
12.1M
      MinAvailableCycle = Height;
393
26.3M
394
26.3M
    if (
isReady(PredSU)26.3M
) {
395
26.3M
      AvailableQueue->push(PredSU);
396
26.3M
    }
397
26.3M
    // CapturePred and others may have left the node in the pending queue, avoid
398
26.3M
    // adding it twice.
399
18.4E
    else 
if (18.4E
!PredSU->isPending18.4E
) {
400
0
      PredSU->isPending = true;
401
0
      PendingQueue.push_back(PredSU);
402
0
    }
403
26.3M
  }
404
34.2M
}
405
406
/// IsChainDependent - Test if Outer is reachable from Inner through
407
/// chain dependencies.
408
static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
409
                             unsigned NestLevel,
410
45.9k
                             const TargetInstrInfo *TII) {
411
45.9k
  SDNode *N = Outer;
412
91.9k
  for (;;) {
413
91.9k
    if (N == Inner)
414
3
      return true;
415
91.9k
    // For a TokenFactor, examine each operand. There may be multiple ways
416
91.9k
    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
417
91.9k
    // most nesting in order to ensure that we find the corresponding match.
418
91.9k
    
if (91.9k
N->getOpcode() == ISD::TokenFactor91.9k
) {
419
4.80k
      for (const SDValue &Op : N->op_values())
420
40.7k
        
if (40.7k
IsChainDependent(Op.getNode(), Inner, NestLevel, TII)40.7k
)
421
3
          return true;
422
4.79k
      return false;
423
4.79k
    }
424
87.1k
    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
425
87.1k
    
if (87.1k
N->isMachineOpcode()87.1k
) {
426
86.7k
      if (
N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()86.7k
) {
427
0
        ++NestLevel;
428
86.7k
      } else 
if (86.7k
N->getMachineOpcode() == TII->getCallFrameSetupOpcode()86.7k
) {
429
41.1k
        if (NestLevel == 0)
430
41.1k
          return false;
431
0
        --NestLevel;
432
0
      }
433
86.7k
    }
434
87.1k
    // Otherwise, find the chain and continue climbing.
435
46.0k
    for (const SDValue &Op : N->op_values())
436
300k
      
if (300k
Op.getValueType() == MVT::Other300k
) {
437
46.0k
        N = Op.getNode();
438
46.0k
        goto found_chain_operand;
439
46.0k
      }
440
0
    return false;
441
46.0k
  found_chain_operand:;
442
46.0k
    if (N->getOpcode() == ISD::EntryToken)
443
0
      return false;
444
0
  }
445
45.9k
}
446
447
/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
448
/// the corresponding (lowered) CALLSEQ_BEGIN node.
449
///
450
/// NestLevel and MaxNested are used in recursion to indcate the current level
451
/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
452
/// level seen so far.
453
///
454
/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
455
/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
456
static SDNode *
457
FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
458
1.67M
                 const TargetInstrInfo *TII) {
459
8.22M
  for (;;) {
460
8.22M
    // For a TokenFactor, examine each operand. There may be multiple ways
461
8.22M
    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
462
8.22M
    // most nesting in order to ensure that we find the corresponding match.
463
8.22M
    if (
N->getOpcode() == ISD::TokenFactor8.22M
) {
464
33.5k
      SDNode *Best = nullptr;
465
33.5k
      unsigned BestMaxNest = MaxNest;
466
104k
      for (const SDValue &Op : N->op_values()) {
467
104k
        unsigned MyNestLevel = NestLevel;
468
104k
        unsigned MyMaxNest = MaxNest;
469
104k
        if (SDNode *New = FindCallSeqStart(Op.getNode(),
470
104k
                                           MyNestLevel, MyMaxNest, TII))
471
104k
          
if (104k
!Best || 104k
(MyMaxNest > BestMaxNest)70.9k
) {
472
33.5k
            Best = New;
473
33.5k
            BestMaxNest = MyMaxNest;
474
33.5k
          }
475
104k
      }
476
33.5k
      assert(Best);
477
33.5k
      MaxNest = BestMaxNest;
478
33.5k
      return Best;
479
33.5k
    }
480
8.19M
    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
481
8.19M
    
if (8.19M
N->isMachineOpcode()8.19M
) {
482
4.89M
      if (
N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()4.89M
) {
483
1.56M
        ++NestLevel;
484
1.56M
        MaxNest = std::max(MaxNest, NestLevel);
485
4.89M
      } else 
if (3.32M
N->getMachineOpcode() == TII->getCallFrameSetupOpcode()3.32M
) {
486
1.63M
        assert(NestLevel != 0);
487
1.63M
        --NestLevel;
488
1.63M
        if (NestLevel == 0)
489
1.63M
          return N;
490
6.55M
      }
491
4.89M
    }
492
6.55M
    // Otherwise, find the chain and continue climbing.
493
6.55M
    for (const SDValue &Op : N->op_values())
494
16.6M
      
if (16.6M
Op.getValueType() == MVT::Other16.6M
) {
495
6.55M
        N = Op.getNode();
496
6.55M
        goto found_chain_operand;
497
6.55M
      }
498
0
    return nullptr;
499
6.55M
  found_chain_operand:;
500
6.55M
    if (N->getOpcode() == ISD::EntryToken)
501
37
      return nullptr;
502
0
  }
503
1.67M
}
504
505
/// Call ReleasePred for each predecessor, then update register live def/gen.
506
/// Always update LiveRegDefs for a register dependence even if the current SU
507
/// also defines the register. This effectively create one large live range
508
/// across a sequence of two-address node. This is important because the
509
/// entire chain must be scheduled together. Example:
510
///
511
/// flags = (3) add
512
/// flags = (2) addc flags
513
/// flags = (1) addc flags
514
///
515
/// results in
516
///
517
/// LiveRegDefs[flags] = 3
518
/// LiveRegGens[flags] = 1
519
///
520
/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
521
/// interference on flags.
522
33.1M
void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
523
33.1M
  // Bottom up: release predecessors
524
34.2M
  for (SDep &Pred : SU->Preds) {
525
34.2M
    ReleasePred(SU, &Pred);
526
34.2M
    if (
Pred.isAssignedRegDep()34.2M
) {
527
1.00M
      // This is a physical register dependency and it's impossible or
528
1.00M
      // expensive to copy the register. Make sure nothing that can
529
1.00M
      // clobber the register is scheduled between the predecessor and
530
1.00M
      // this node.
531
1.00M
      SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
532
1.00M
      assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
533
1.00M
             "interference on register dependence");
534
1.00M
      LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
535
1.00M
      if (
!LiveRegGens[Pred.getReg()]1.00M
) {
536
967k
        ++NumLiveRegs;
537
967k
        LiveRegGens[Pred.getReg()] = SU;
538
967k
      }
539
1.00M
    }
540
34.2M
  }
541
33.1M
542
33.1M
  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
543
33.1M
  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
544
33.1M
  // these nodes, to prevent other calls from being interscheduled with them.
545
33.1M
  unsigned CallResource = TRI->getNumRegs();
546
33.1M
  if (!LiveRegDefs[CallResource])
547
56.8M
    
for (SDNode *Node = SU->getNode(); 29.3M
Node56.8M
;
Node = Node->getGluedNode()27.4M
)
548
29.0M
      
if (29.0M
Node->isMachineOpcode() &&
549
29.0M
          
Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()16.9M
) {
550
1.56M
        unsigned NestLevel = 0;
551
1.56M
        unsigned MaxNest = 0;
552
1.56M
        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
553
1.56M
        assert(N && "Must find call sequence start");
554
1.56M
555
1.56M
        SUnit *Def = &SUnits[N->getNodeId()];
556
1.56M
        CallSeqEndForStart[Def] = SU;
557
1.56M
558
1.56M
        ++NumLiveRegs;
559
1.56M
        LiveRegDefs[CallResource] = Def;
560
1.56M
        LiveRegGens[CallResource] = SU;
561
1.56M
        break;
562
1.56M
      }
563
33.1M
}
564
565
/// Check to see if any of the pending instructions are ready to issue.  If
566
/// so, add them to the available queue.
567
29.5M
void ScheduleDAGRRList::ReleasePending() {
568
29.5M
  if (
DisableSchedCycles29.5M
) {
569
0
    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
570
0
    return;
571
0
  }
572
29.5M
573
29.5M
  // If the available queue is empty, it is safe to reset MinAvailableCycle.
574
29.5M
  
if (29.5M
AvailableQueue->empty()29.5M
)
575
15.3M
    MinAvailableCycle = UINT_MAX;
576
29.5M
577
29.5M
  // Check to see if any of the pending instructions are ready to issue.  If
578
29.5M
  // so, add them to the available queue.
579
29.5M
  for (unsigned i = 0, e = PendingQueue.size(); 
i != e29.5M
;
++i0
) {
580
0
    unsigned ReadyCycle = PendingQueue[i]->getHeight();
581
0
    if (ReadyCycle < MinAvailableCycle)
582
0
      MinAvailableCycle = ReadyCycle;
583
0
584
0
    if (
PendingQueue[i]->isAvailable0
) {
585
0
      if (!isReady(PendingQueue[i]))
586
0
          continue;
587
0
      AvailableQueue->push(PendingQueue[i]);
588
0
    }
589
0
    PendingQueue[i]->isPending = false;
590
0
    PendingQueue[i] = PendingQueue.back();
591
0
    PendingQueue.pop_back();
592
0
    --i; --e;
593
0
  }
594
29.5M
}
595
596
/// Move the scheduler state forward by the specified number of Cycles.
597
87.2M
void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
598
87.2M
  if (NextCycle <= CurCycle)
599
57.6M
    return;
600
29.5M
601
29.5M
  IssueCount = 0;
602
29.5M
  AvailableQueue->setCurCycle(NextCycle);
603
29.5M
  if (
!HazardRec->isEnabled()29.5M
) {
604
29.4M
    // Bypass lots of virtual calls in case of long latency.
605
29.4M
    CurCycle = NextCycle;
606
29.4M
  }
607
151k
  else {
608
381k
    for (; 
CurCycle != NextCycle381k
;
++CurCycle230k
) {
609
230k
      HazardRec->RecedeCycle();
610
230k
    }
611
151k
  }
612
87.2M
  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
613
87.2M
  // available Q to release pending nodes at least once before popping.
614
87.2M
  ReleasePending();
615
87.2M
}
616
617
/// Move the scheduler state forward until the specified node's dependents are
618
/// ready and can be scheduled with no resource conflicts.
619
29.6M
void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
620
29.6M
  if (DisableSchedCycles)
621
0
    return;
622
29.6M
623
29.6M
  // FIXME: Nodes such as CopyFromReg probably should not advance the current
624
29.6M
  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
625
29.6M
  // has predecessors the cycle will be advanced when they are scheduled.
626
29.6M
  // But given the crude nature of modeling latency though such nodes, we
627
29.6M
  // currently need to treat these nodes like real instructions.
628
29.6M
  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
629
29.6M
630
29.6M
  unsigned ReadyCycle = SU->getHeight();
631
29.6M
632
29.6M
  // Bump CurCycle to account for latency. We assume the latency of other
633
29.6M
  // available instructions may be hidden by the stall (not a full pipe stall).
634
29.6M
  // This updates the hazard recognizer's cycle before reserving resources for
635
29.6M
  // this instruction.
636
29.6M
  AdvanceToCycle(ReadyCycle);
637
29.6M
638
29.6M
  // Calls are scheduled in their preceding cycle, so don't conflict with
639
29.6M
  // hazards from instructions after the call. EmitNode will reset the
640
29.6M
  // scoreboard state before emitting the call.
641
29.6M
  if (SU->isCall)
642
1.56M
    return;
643
28.1M
644
28.1M
  // FIXME: For resource conflicts in very long non-pipelined stages, we
645
28.1M
  // should probably skip ahead here to avoid useless scoreboard checks.
646
28.1M
  int Stalls = 0;
647
28.1M
  while (
true28.1M
) {
648
28.1M
    ScheduleHazardRecognizer::HazardType HT =
649
28.1M
      HazardRec->getHazardType(SU, -Stalls);
650
28.1M
651
28.1M
    if (HT == ScheduleHazardRecognizer::NoHazard)
652
28.1M
      break;
653
20.7k
654
20.7k
    ++Stalls;
655
20.7k
  }
656
29.6M
  AdvanceToCycle(CurCycle + Stalls);
657
29.6M
}
658
659
/// Record this SUnit in the HazardRecognizer.
660
/// Does not update CurCycle.
661
29.6M
void ScheduleDAGRRList::EmitNode(SUnit *SU) {
662
29.6M
  if (!HazardRec->isEnabled())
663
29.4M
    return;
664
255k
665
255k
  // Check for phys reg copy.
666
255k
  
if (255k
!SU->getNode()255k
)
667
2
    return;
668
255k
669
255k
  switch (SU->getNode()->getOpcode()) {
670
160k
  default:
671
160k
    assert(SU->getNode()->isMachineOpcode() &&
672
160k
           "This target-independent node should not be scheduled.");
673
160k
    break;
674
95.3k
  case ISD::MERGE_VALUES:
675
95.3k
  case ISD::TokenFactor:
676
95.3k
  case ISD::LIFETIME_START:
677
95.3k
  case ISD::LIFETIME_END:
678
95.3k
  case ISD::CopyToReg:
679
95.3k
  case ISD::CopyFromReg:
680
95.3k
  case ISD::EH_LABEL:
681
95.3k
    // Noops don't affect the scoreboard state. Copies are likely to be
682
95.3k
    // removed.
683
95.3k
    return;
684
420
  case ISD::INLINEASM:
685
420
    // For inline asm, clear the pipeline state.
686
420
    HazardRec->Reset();
687
420
    return;
688
160k
  }
689
160k
  
if (160k
SU->isCall160k
) {
690
5.02k
    // Calls are scheduled with their preceding instructions. For bottom-up
691
5.02k
    // scheduling, clear the pipeline state before emitting.
692
5.02k
    HazardRec->Reset();
693
5.02k
  }
694
29.6M
695
29.6M
  HazardRec->EmitInstruction(SU);
696
29.6M
}
697
698
static void resetVRegCycle(SUnit *SU);
699
700
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
701
/// count of its predecessors. If a predecessor pending count is zero, add it to
702
/// the Available queue.
703
29.6M
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
704
29.6M
  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
705
29.6M
  DEBUG(SU->dump(this));
706
29.6M
707
#ifndef NDEBUG
708
  if (CurCycle < SU->getHeight())
709
    DEBUG(dbgs() << "   Height [" << SU->getHeight()
710
          << "] pipeline stall!\n");
711
#endif
712
713
29.6M
  // FIXME: Do not modify node height. It may interfere with
714
29.6M
  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
715
29.6M
  // node its ready cycle can aid heuristics, and after scheduling it can
716
29.6M
  // indicate the scheduled cycle.
717
29.6M
  SU->setHeightToAtLeast(CurCycle);
718
29.6M
719
29.6M
  // Reserve resources for the scheduled instruction.
720
29.6M
  EmitNode(SU);
721
29.6M
722
29.6M
  Sequence.push_back(SU);
723
29.6M
724
29.6M
  AvailableQueue->scheduledNode(SU);
725
29.6M
726
29.6M
  // If HazardRec is disabled, and each inst counts as one cycle, then
727
29.6M
  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
728
29.6M
  // PendingQueue for schedulers that implement HasReadyFilter.
729
29.6M
  if (
!HazardRec->isEnabled() && 29.6M
AvgIPC < 229.4M
)
730
29.4M
    AdvanceToCycle(CurCycle + 1);
731
29.6M
732
29.6M
  // Update liveness of predecessors before successors to avoid treating a
733
29.6M
  // two-address node as a live range def.
734
29.6M
  ReleasePredecessors(SU);
735
29.6M
736
29.6M
  // Release all the implicit physical register defs that are live.
737
34.3M
  for (SDep &Succ : SU->Succs) {
738
34.3M
    // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
739
34.3M
    if (
Succ.isAssignedRegDep() && 34.3M
LiveRegDefs[Succ.getReg()] == SU1.00M
) {
740
965k
      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
741
965k
      --NumLiveRegs;
742
965k
      LiveRegDefs[Succ.getReg()] = nullptr;
743
965k
      LiveRegGens[Succ.getReg()] = nullptr;
744
965k
      releaseInterferences(Succ.getReg());
745
965k
    }
746
34.3M
  }
747
29.6M
  // Release the special call resource dependence, if this is the beginning
748
29.6M
  // of a call.
749
29.6M
  unsigned CallResource = TRI->getNumRegs();
750
29.6M
  if (LiveRegDefs[CallResource] == SU)
751
3.13M
    
for (const SDNode *SUNode = SU->getNode(); 1.56M
SUNode3.13M
;
752
1.56M
         
SUNode = SUNode->getGluedNode()1.56M
) {
753
1.56M
      if (SUNode->isMachineOpcode() &&
754
1.56M
          
SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()1.56M
) {
755
1.56M
        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
756
1.56M
        --NumLiveRegs;
757
1.56M
        LiveRegDefs[CallResource] = nullptr;
758
1.56M
        LiveRegGens[CallResource] = nullptr;
759
1.56M
        releaseInterferences(CallResource);
760
1.56M
      }
761
1.56M
    }
762
29.6M
763
29.6M
  resetVRegCycle(SU);
764
29.6M
765
29.6M
  SU->isScheduled = true;
766
29.6M
767
29.6M
  // Conditions under which the scheduler should eagerly advance the cycle:
768
29.6M
  // (1) No available instructions
769
29.6M
  // (2) All pipelines full, so available instructions must have hazards.
770
29.6M
  //
771
29.6M
  // If HazardRec is disabled, the cycle was pre-advanced before calling
772
29.6M
  // ReleasePredecessors. In that case, IssueCount should remain 0.
773
29.6M
  //
774
29.6M
  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
775
29.6M
  if (
HazardRec->isEnabled() || 29.6M
AvgIPC > 129.4M
) {
776
255k
    if (
SU->getNode() && 255k
SU->getNode()->isMachineOpcode()255k
)
777
159k
      ++IssueCount;
778
255k
    if (
(HazardRec->isEnabled() && 255k
HazardRec->atIssueLimit()255k
)
779
237k
        || 
(!HazardRec->isEnabled() && 237k
IssueCount == AvgIPC0
))
780
18.3k
      AdvanceToCycle(CurCycle + 1);
781
255k
  }
782
29.6M
}
783
784
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
785
/// unscheduled, increase the succ left count of its predecessors. Remove
786
/// them from AvailableQueue if necessary.
787
113k
void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
788
113k
  SUnit *PredSU = PredEdge->getSUnit();
789
113k
  if (
PredSU->isAvailable113k
) {
790
55.2k
    PredSU->isAvailable = false;
791
55.2k
    if (!PredSU->isPending)
792
45.4k
      AvailableQueue->remove(PredSU);
793
55.2k
  }
794
113k
795
113k
  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
796
113k
  ++PredSU->NumSuccsLeft;
797
113k
}
798
799
/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
800
/// its predecessor states to reflect the change.
801
58.2k
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
802
58.2k
  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
803
58.2k
  DEBUG(SU->dump(this));
804
58.2k
805
113k
  for (SDep &Pred : SU->Preds) {
806
113k
    CapturePred(&Pred);
807
113k
    if (
Pred.isAssignedRegDep() && 113k
SU == LiveRegGens[Pred.getReg()]26.9k
){
808
21.8k
      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
809
21.8k
      assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
810
21.8k
             "Physical register dependency violated?");
811
21.8k
      --NumLiveRegs;
812
21.8k
      LiveRegDefs[Pred.getReg()] = nullptr;
813
21.8k
      LiveRegGens[Pred.getReg()] = nullptr;
814
21.8k
      releaseInterferences(Pred.getReg());
815
21.8k
    }
816
113k
  }
817
58.2k
818
58.2k
  // Reclaim the special call resource dependence, if this is the beginning
819
58.2k
  // of a call.
820
58.2k
  unsigned CallResource = TRI->getNumRegs();
821
146k
  for (const SDNode *SUNode = SU->getNode(); SUNode;
822
87.8k
       
SUNode = SUNode->getGluedNode()87.8k
) {
823
87.8k
    if (SUNode->isMachineOpcode() &&
824
87.8k
        
SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()59.0k
) {
825
6
      SUnit *SeqEnd = CallSeqEndForStart[SU];
826
6
      assert(SeqEnd && "Call sequence start/end must be known");
827
6
      assert(!LiveRegDefs[CallResource]);
828
6
      assert(!LiveRegGens[CallResource]);
829
6
      ++NumLiveRegs;
830
6
      LiveRegDefs[CallResource] = SU;
831
6
      LiveRegGens[CallResource] = SeqEnd;
832
6
    }
833
87.8k
  }
834
58.2k
835
58.2k
  // Release the special call resource dependence, if this is the end
836
58.2k
  // of a call.
837
58.2k
  if (LiveRegGens[CallResource] == SU)
838
411
    
for (const SDNode *SUNode = SU->getNode(); 137
SUNode411
;
839
274
         
SUNode = SUNode->getGluedNode()274
) {
840
274
      if (SUNode->isMachineOpcode() &&
841
274
          
SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()274
) {
842
137
        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
843
137
        assert(LiveRegDefs[CallResource]);
844
137
        assert(LiveRegGens[CallResource]);
845
137
        --NumLiveRegs;
846
137
        LiveRegDefs[CallResource] = nullptr;
847
137
        LiveRegGens[CallResource] = nullptr;
848
137
        releaseInterferences(CallResource);
849
137
      }
850
137
    }
851
58.2k
852
120k
  for (auto &Succ : SU->Succs) {
853
120k
    if (
Succ.isAssignedRegDep()120k
) {
854
20.7k
      auto Reg = Succ.getReg();
855
20.7k
      if (!LiveRegDefs[Reg])
856
20.0k
        ++NumLiveRegs;
857
20.7k
      // This becomes the nearest def. Note that an earlier def may still be
858
20.7k
      // pending if this is a two-address node.
859
20.7k
      LiveRegDefs[Reg] = SU;
860
20.7k
861
20.7k
      // Update LiveRegGen only if was empty before this unscheduling.
862
20.7k
      // This is to avoid incorrect updating LiveRegGen set in previous run.
863
20.7k
      if (
!LiveRegGens[Reg]20.7k
) {
864
20.0k
        // Find the successor with the lowest height.
865
20.0k
        LiveRegGens[Reg] = Succ.getSUnit();
866
40.3k
        for (auto &Succ2 : SU->Succs) {
867
40.3k
          if (
Succ2.isAssignedRegDep() && 40.3k
Succ2.getReg() == Reg20.0k
&&
868
20.0k
              Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
869
0
            LiveRegGens[Reg] = Succ2.getSUnit();
870
40.3k
        }
871
20.0k
      }
872
20.7k
    }
873
120k
  }
874
58.2k
  if (SU->getHeight() < MinAvailableCycle)
875
57.2k
    MinAvailableCycle = SU->getHeight();
876
58.2k
877
58.2k
  SU->setHeightDirty();
878
58.2k
  SU->isScheduled = false;
879
58.2k
  SU->isAvailable = true;
880
58.2k
  if (
!DisableSchedCycles && 58.2k
AvailableQueue->hasReadyFilter()58.2k
) {
881
0
    // Don't make available until backtracking is complete.
882
0
    SU->isPending = true;
883
0
    PendingQueue.push_back(SU);
884
0
  }
885
58.2k
  else {
886
58.2k
    AvailableQueue->push(SU);
887
58.2k
  }
888
58.2k
  AvailableQueue->unscheduledNode(SU);
889
58.2k
}
890
891
/// After backtracking, the hazard checker needs to be restored to a state
892
/// corresponding the current cycle.
893
1.72k
void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
894
1.72k
  HazardRec->Reset();
895
1.72k
896
1.72k
  unsigned LookAhead = std::min((unsigned)Sequence.size(),
897
1.72k
                                HazardRec->getMaxLookAhead());
898
1.72k
  if (LookAhead == 0)
899
1.68k
    return;
900
43
901
43
  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
902
43
  unsigned HazardCycle = (*I)->getHeight();
903
438
  for (auto E = Sequence.end(); 
I != E438
;
++I395
) {
904
395
    SUnit *SU = *I;
905
629
    for (; 
SU->getHeight() > HazardCycle629
;
++HazardCycle234
) {
906
234
      HazardRec->RecedeCycle();
907
234
    }
908
395
    EmitNode(SU);
909
395
  }
910
1.72k
}
911
912
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
913
/// BTCycle in order to schedule a specific node.
914
1.72k
void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
915
1.72k
  SUnit *OldSU = Sequence.back();
916
58.2k
  while (
true58.2k
) {
917
58.2k
    Sequence.pop_back();
918
58.2k
    // FIXME: use ready cycle instead of height
919
58.2k
    CurCycle = OldSU->getHeight();
920
58.2k
    UnscheduleNodeBottomUp(OldSU);
921
58.2k
    AvailableQueue->setCurCycle(CurCycle);
922
58.2k
    if (OldSU == BtSU)
923
1.72k
      break;
924
56.4k
    OldSU = Sequence.back();
925
56.4k
  }
926
1.72k
927
1.72k
  assert(!SU->isSucc(OldSU) && "Something is wrong!");
928
1.72k
929
1.72k
  RestoreHazardCheckerBottomUp();
930
1.72k
931
1.72k
  ReleasePending();
932
1.72k
933
1.72k
  ++NumBacktracks;
934
1.72k
}
935
936
4
static bool isOperandOf(const SUnit *SU, SDNode *N) {
937
5
  for (const SDNode *SUNode = SU->getNode(); SUNode;
938
4
       
SUNode = SUNode->getGluedNode()1
) {
939
4
    if (SUNode->isOperandOf(N))
940
3
      return true;
941
4
  }
942
1
  return false;
943
4
}
944
945
/// TryUnfold - Attempt to unfold
946
17
SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
947
17
  SDNode *N = SU->getNode();
948
17
  // Use while over if to ease fall through.
949
17
  SmallVector<SDNode *, 2> NewNodes;
950
17
  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
951
0
    return nullptr;
952
17
953
17
  // unfolding an x86 DEC64m operation results in store, dec, load which
954
17
  // can't be handled here so quit
955
17
  
if (17
NewNodes.size() == 317
)
956
3
    return nullptr;
957
14
958
17
  assert(NewNodes.size() == 2 && "Expected a load folding node!");
959
14
960
14
  N = NewNodes[1];
961
14
  SDNode *LoadNode = NewNodes[0];
962
14
  unsigned NumVals = N->getNumValues();
963
14
  unsigned OldNumVals = SU->getNode()->getNumValues();
964
14
965
14
  // LoadNode may already exist. This can happen when there is another
966
14
  // load from the same location and producing the same type of value
967
14
  // but it has different alignment or volatileness.
968
14
  bool isNewLoad = true;
969
14
  SUnit *LoadSU;
970
14
  if (
LoadNode->getNodeId() != -114
) {
971
1
    LoadSU = &SUnits[LoadNode->getNodeId()];
972
1
    // If LoadSU has already been scheduled, we should clone it but
973
1
    // this would negate the benefit to unfolding so just return SU.
974
1
    if (LoadSU->isScheduled)
975
1
      return SU;
976
0
    isNewLoad = false;
977
14
  } else {
978
13
    LoadSU = CreateNewSUnit(LoadNode);
979
13
    LoadNode->setNodeId(LoadSU->NodeNum);
980
13
981
13
    InitNumRegDefsLeft(LoadSU);
982
13
    computeLatency(LoadSU);
983
13
  }
984
14
985
13
  
DEBUG13
(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
986
13
987
13
  // Now that we are committed to unfolding replace DAG Uses.
988
27
  for (unsigned i = 0; 
i != NumVals27
;
++i14
)
989
14
    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
990
13
  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
991
13
                                 SDValue(LoadNode, 1));
992
13
993
13
  SUnit *NewSU = CreateNewSUnit(N);
994
13
  assert(N->getNodeId() == -1 && "Node already inserted!");
995
13
  N->setNodeId(NewSU->NodeNum);
996
13
997
13
  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
998
38
  for (unsigned i = 0; 
i != MCID.getNumOperands()38
;
++i25
) {
999
26
    if (
MCID.getOperandConstraint(i, MCOI::TIED_TO) != -126
) {
1000
1
      NewSU->isTwoAddress = true;
1001
1
      break;
1002
1
    }
1003
26
  }
1004
13
  if (MCID.isCommutable())
1005
1
    NewSU->isCommutable = true;
1006
13
1007
13
  InitNumRegDefsLeft(NewSU);
1008
13
  computeLatency(NewSU);
1009
13
1010
13
  // Record all the edges to and from the old SU, by category.
1011
13
  SmallVector<SDep, 4> ChainPreds;
1012
13
  SmallVector<SDep, 4> ChainSuccs;
1013
13
  SmallVector<SDep, 4> LoadPreds;
1014
13
  SmallVector<SDep, 4> NodePreds;
1015
13
  SmallVector<SDep, 4> NodeSuccs;
1016
14
  for (SDep &Pred : SU->Preds) {
1017
14
    if (Pred.isCtrl())
1018
10
      ChainPreds.push_back(Pred);
1019
4
    else 
if (4
isOperandOf(Pred.getSUnit(), LoadNode)4
)
1020
3
      LoadPreds.push_back(Pred);
1021
4
    else
1022
1
      NodePreds.push_back(Pred);
1023
14
  }
1024
27
  for (SDep &Succ : SU->Succs) {
1025
27
    if (Succ.isCtrl())
1026
13
      ChainSuccs.push_back(Succ);
1027
27
    else
1028
14
      NodeSuccs.push_back(Succ);
1029
27
  }
1030
13
1031
13
  // Now assign edges to the newly-created nodes.
1032
10
  for (const SDep &Pred : ChainPreds) {
1033
10
    RemovePred(SU, Pred);
1034
10
    if (isNewLoad)
1035
10
      AddPred(LoadSU, Pred);
1036
10
  }
1037
3
  for (const SDep &Pred : LoadPreds) {
1038
3
    RemovePred(SU, Pred);
1039
3
    if (isNewLoad)
1040
3
      AddPred(LoadSU, Pred);
1041
3
  }
1042
1
  for (const SDep &Pred : NodePreds) {
1043
1
    RemovePred(SU, Pred);
1044
1
    AddPred(NewSU, Pred);
1045
1
  }
1046
14
  for (SDep D : NodeSuccs) {
1047
14
    SUnit *SuccDep = D.getSUnit();
1048
14
    D.setSUnit(SU);
1049
14
    RemovePred(SuccDep, D);
1050
14
    D.setSUnit(NewSU);
1051
14
    AddPred(SuccDep, D);
1052
14
    // Balance register pressure.
1053
14
    if (
AvailableQueue->tracksRegPressure() && 14
SuccDep->isScheduled0
&&
1054
14
        
!D.isCtrl()0
&&
NewSU->NumRegDefsLeft > 00
)
1055
0
      --NewSU->NumRegDefsLeft;
1056
14
  }
1057
13
  for (SDep D : ChainSuccs) {
1058
13
    SUnit *SuccDep = D.getSUnit();
1059
13
    D.setSUnit(SU);
1060
13
    RemovePred(SuccDep, D);
1061
13
    if (
isNewLoad13
) {
1062
13
      D.setSUnit(LoadSU);
1063
13
      AddPred(SuccDep, D);
1064
13
    }
1065
13
  }
1066
13
1067
13
  // Add a data dependency to reflect that NewSU reads the value defined
1068
13
  // by LoadSU.
1069
13
  SDep D(LoadSU, SDep::Data, 0);
1070
13
  D.setLatency(LoadSU->Latency);
1071
13
  AddPred(NewSU, D);
1072
13
1073
13
  if (isNewLoad)
1074
13
    AvailableQueue->addNode(LoadSU);
1075
13
  AvailableQueue->addNode(NewSU);
1076
13
1077
13
  ++NumUnfolds;
1078
13
1079
13
  if (NewSU->NumSuccsLeft == 0)
1080
12
    NewSU->isAvailable = true;
1081
13
1082
13
  return NewSU;
1083
17
}
1084
1085
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1086
/// successors to the newly created node.
1087
584
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1088
584
  SDNode *N = SU->getNode();
1089
584
  if (!N)
1090
0
    return nullptr;
1091
584
1092
584
  
if (584
SU->getNode()->getGluedNode()584
)
1093
49
    return nullptr;
1094
535
1095
535
  SUnit *NewSU;
1096
535
  bool TryUnfold = false;
1097
1.37k
  for (unsigned i = 0, e = N->getNumValues(); 
i != e1.37k
;
++i844
) {
1098
844
    MVT VT = N->getSimpleValueType(i);
1099
844
    if (VT == MVT::Glue)
1100
0
      return nullptr;
1101
844
    else 
if (844
VT == MVT::Other844
)
1102
17
      TryUnfold = true;
1103
844
  }
1104
535
  
for (const SDValue &Op : N->op_values()) 535
{
1105
1.30k
    MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1106
1.30k
    if (VT == MVT::Glue)
1107
0
      return nullptr;
1108
535
  }
1109
535
1110
535
  // If possible unfold instruction.
1111
535
  
if (535
TryUnfold535
) {
1112
17
    SUnit *UnfoldSU = TryUnfoldSU(SU);
1113
17
    if (!UnfoldSU)
1114
3
      return nullptr;
1115
14
    SU = UnfoldSU;
1116
14
    N = SU->getNode();
1117
14
    // If this can be scheduled don't bother duplicating and just return
1118
14
    if (SU->NumSuccsLeft == 0)
1119
12
      return SU;
1120
520
  }
1121
520
1122
520
  
DEBUG520
(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1123
520
  NewSU = CreateClone(SU);
1124
520
1125
520
  // New SUnit has the exact same predecessors.
1126
520
  for (SDep &Pred : SU->Preds)
1127
605
    
if (605
!Pred.isArtificial()605
)
1128
605
      AddPred(NewSU, Pred);
1129
520
1130
520
  // Only copy scheduled successors. Cut them from old node's successor
1131
520
  // list and move them over.
1132
520
  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1133
1.91k
  for (SDep &Succ : SU->Succs) {
1134
1.91k
    if (Succ.isArtificial())
1135
0
      continue;
1136
1.91k
    SUnit *SuccSU = Succ.getSUnit();
1137
1.91k
    if (
SuccSU->isScheduled1.91k
) {
1138
568
      SDep D = Succ;
1139
568
      D.setSUnit(NewSU);
1140
568
      AddPred(SuccSU, D);
1141
568
      D.setSUnit(SU);
1142
568
      DelDeps.push_back(std::make_pair(SuccSU, D));
1143
568
    }
1144
1.91k
  }
1145
520
  for (auto &DelDep : DelDeps)
1146
568
    RemovePred(DelDep.first, DelDep.second);
1147
584
1148
584
  AvailableQueue->updateNode(SU);
1149
584
  AvailableQueue->addNode(NewSU);
1150
584
1151
584
  ++NumDups;
1152
584
  return NewSU;
1153
584
}
1154
1155
/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1156
/// scheduled successors of the given SUnit to the last copy.
1157
void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1158
                                              const TargetRegisterClass *DestRC,
1159
                                              const TargetRegisterClass *SrcRC,
1160
52
                                              SmallVectorImpl<SUnit*> &Copies) {
1161
52
  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1162
52
  CopyFromSU->CopySrcRC = SrcRC;
1163
52
  CopyFromSU->CopyDstRC = DestRC;
1164
52
1165
52
  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1166
52
  CopyToSU->CopySrcRC = DestRC;
1167
52
  CopyToSU->CopyDstRC = SrcRC;
1168
52
1169
52
  // Only copy scheduled successors. Cut them from old node's successor
1170
52
  // list and move them over.
1171
52
  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1172
170
  for (SDep &Succ : SU->Succs) {
1173
170
    if (Succ.isArtificial())
1174
0
      continue;
1175
170
    SUnit *SuccSU = Succ.getSUnit();
1176
170
    if (
SuccSU->isScheduled170
) {
1177
83
      SDep D = Succ;
1178
83
      D.setSUnit(CopyToSU);
1179
83
      AddPred(SuccSU, D);
1180
83
      DelDeps.push_back(std::make_pair(SuccSU, Succ));
1181
83
    }
1182
87
    else {
1183
87
      // Avoid scheduling the def-side copy before other successors. Otherwise
1184
87
      // we could introduce another physreg interference on the copy and
1185
87
      // continue inserting copies indefinitely.
1186
87
      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1187
87
    }
1188
170
  }
1189
52
  for (auto &DelDep : DelDeps)
1190
83
    RemovePred(DelDep.first, DelDep.second);
1191
52
1192
52
  SDep FromDep(SU, SDep::Data, Reg);
1193
52
  FromDep.setLatency(SU->Latency);
1194
52
  AddPred(CopyFromSU, FromDep);
1195
52
  SDep ToDep(CopyFromSU, SDep::Data, 0);
1196
52
  ToDep.setLatency(CopyFromSU->Latency);
1197
52
  AddPred(CopyToSU, ToDep);
1198
52
1199
52
  AvailableQueue->updateNode(SU);
1200
52
  AvailableQueue->addNode(CopyFromSU);
1201
52
  AvailableQueue->addNode(CopyToSU);
1202
52
  Copies.push_back(CopyFromSU);
1203
52
  Copies.push_back(CopyToSU);
1204
52
1205
52
  ++NumPRCopies;
1206
52
}
1207
1208
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1209
/// definition of the specified node.
1210
/// FIXME: Move to SelectionDAG?
1211
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1212
584
                                 const TargetInstrInfo *TII) {
1213
584
  unsigned NumRes;
1214
584
  if (
N->getOpcode() == ISD::CopyFromReg584
) {
1215
24
    // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1216
24
    NumRes = 1;
1217
584
  } else {
1218
560
    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1219
560
    assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1220
560
    NumRes = MCID.getNumDefs();
1221
562
    for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); 
*ImpDef562
;
++ImpDef2
) {
1222
562
      if (Reg == *ImpDef)
1223
560
        break;
1224
2
      ++NumRes;
1225
2
    }
1226
560
  }
1227
584
  return N->getSimpleValueType(NumRes);
1228
584
}
1229
1230
/// CheckForLiveRegDef - Return true and update live register vector if the
1231
/// specified register def of the specified SUnit clobbers any "live" registers.
1232
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1233
                               SUnit **LiveRegDefs,
1234
                               SmallSet<unsigned, 4> &RegAdded,
1235
                               SmallVectorImpl<unsigned> &LRegs,
1236
2.64M
                               const TargetRegisterInfo *TRI) {
1237
6.94M
  for (MCRegAliasIterator AliasI(Reg, TRI, true); 
AliasI.isValid()6.94M
;
++AliasI4.29M
) {
1238
4.29M
1239
4.29M
    // Check if Ref is live.
1240
4.29M
    if (
!LiveRegDefs[*AliasI]4.29M
)
continue3.27M
;
1241
1.01M
1242
1.01M
    // Allow multiple uses of the same def.
1243
1.01M
    
if (1.01M
LiveRegDefs[*AliasI] == SU1.01M
)
continue1.00M
;
1244
14.2k
1245
14.2k
    // Add Reg to the set of interfering live regs.
1246
14.2k
    
if (14.2k
RegAdded.insert(*AliasI).second14.2k
) {
1247
13.7k
      LRegs.push_back(*AliasI);
1248
13.7k
    }
1249
4.29M
  }
1250
2.64M
}
1251
1252
/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1253
/// by RegMask, and add them to LRegs.
1254
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1255
                                     ArrayRef<SUnit*> LiveRegDefs,
1256
                                     SmallSet<unsigned, 4> &RegAdded,
1257
5.44k
                                     SmallVectorImpl<unsigned> &LRegs) {
1258
5.44k
  // Look at all live registers. Skip Reg0 and the special CallResource.
1259
1.37M
  for (unsigned i = 1, e = LiveRegDefs.size()-1; 
i != e1.37M
;
++i1.36M
) {
1260
1.36M
    if (
!LiveRegDefs[i]1.36M
)
continue1.36M
;
1261
240
    
if (240
LiveRegDefs[i] == SU240
)
continue0
;
1262
240
    
if (240
!MachineOperand::clobbersPhysReg(RegMask, i)240
)
continue0
;
1263
240
    
if (240
RegAdded.insert(i).second240
)
1264
130
      LRegs.push_back(i);
1265
1.36M
  }
1266
5.44k
}
1267
1268
/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1269
3.83M
static const uint32_t *getNodeRegMask(const SDNode *N) {
1270
3.83M
  for (const SDValue &Op : N->op_values())
1271
10.3M
    
if (const auto *10.3M
RegOp10.3M
= dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1272
5.44k
      return RegOp->getRegMask();
1273
3.83M
  return nullptr;
1274
3.83M
}
1275
1276
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1277
/// scheduling of the given node to satisfy live physical register dependencies.
1278
/// If the specific node is the last one that's available to schedule, do
1279
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1280
bool ScheduleDAGRRList::
1281
29.6M
DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1282
29.6M
  if (NumLiveRegs == 0)
1283
24.5M
    return false;
1284
5.10M
1285
5.10M
  SmallSet<unsigned, 4> RegAdded;
1286
5.10M
  // If this node would clobber any "live" register, then it's not ready.
1287
5.10M
  //
1288
5.10M
  // If SU is the currently live definition of the same register that it uses,
1289
5.10M
  // then we are free to schedule it.
1290
4.15M
  for (SDep &Pred : SU->Preds) {
1291
4.15M
    if (
Pred.isAssignedRegDep() && 4.15M
LiveRegDefs[Pred.getReg()] != SU50.0k
)
1292
28.8k
      CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1293
28.8k
                         RegAdded, LRegs, TRI);
1294
4.15M
  }
1295
5.10M
1296
10.2M
  for (SDNode *Node = SU->getNode(); 
Node10.2M
;
Node = Node->getGluedNode()5.17M
) {
1297
5.17M
    if (
Node->getOpcode() == ISD::INLINEASM5.17M
) {
1298
33
      // Inline asm can clobber physical defs.
1299
33
      unsigned NumOps = Node->getNumOperands();
1300
33
      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1301
0
        --NumOps;  // Ignore the glue operand.
1302
33
1303
71
      for (unsigned i = InlineAsm::Op_FirstOperand; 
i != NumOps71
;) {
1304
38
        unsigned Flags =
1305
38
          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1306
38
        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1307
38
1308
38
        ++i; // Skip the ID value.
1309
38
        if (InlineAsm::isRegDefKind(Flags) ||
1310
35
            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1311
38
            
InlineAsm::isClobberKind(Flags)35
) {
1312
36
          // Check for def of register or earlyclobber register.
1313
72
          for (; 
NumVals72
;
--NumVals, ++i36
) {
1314
36
            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1315
36
            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1316
33
              CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1317
36
          }
1318
36
        } else
1319
2
          i += NumVals;
1320
38
      }
1321
33
      continue;
1322
33
    }
1323
5.17M
1324
5.17M
    
if (5.17M
!Node->isMachineOpcode()5.17M
)
1325
1.33M
      continue;
1326
3.83M
    // If we're in the middle of scheduling a call, don't begin scheduling
1327
3.83M
    // another call. Also, don't allow any physical registers to be live across
1328
3.83M
    // the call.
1329
3.83M
    
if (3.83M
Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()3.83M
) {
1330
5.45k
      // Check the special calling-sequence resource.
1331
5.45k
      unsigned CallResource = TRI->getNumRegs();
1332
5.45k
      if (
LiveRegDefs[CallResource]5.45k
) {
1333
5.21k
        SDNode *Gen = LiveRegGens[CallResource]->getNode();
1334
11.8k
        while (SDNode *Glued = Gen->getGluedNode())
1335
6.63k
          Gen = Glued;
1336
5.21k
        if (!IsChainDependent(Gen, Node, 0, TII) &&
1337
5.21k
            RegAdded.insert(CallResource).second)
1338
5.21k
          LRegs.push_back(CallResource);
1339
5.21k
      }
1340
5.45k
    }
1341
3.83M
    if (const uint32_t *RegMask = getNodeRegMask(Node))
1342
5.44k
      CheckForLiveRegDefMasked(SU, RegMask,
1343
5.44k
                               makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1344
5.44k
                               RegAdded, LRegs);
1345
3.83M
1346
3.83M
    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1347
3.83M
    if (
MCID.hasOptionalDef()3.83M
) {
1348
13.0k
      // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1349
13.0k
      // This operand can be either a def of CPSR, if the S bit is set; or a use
1350
13.0k
      // of %noreg.  When the OptionalDef is set to a valid register, we need to
1351
13.0k
      // handle it in the same way as an ImplicitDef.
1352
26.6k
      for (unsigned i = 0; 
i < MCID.getNumDefs()26.6k
;
++i13.6k
)
1353
13.6k
        
if (13.6k
MCID.OpInfo[i].isOptionalDef()13.6k
) {
1354
641
          const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1355
641
          unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1356
641
          CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1357
641
        }
1358
13.0k
    }
1359
3.83M
    if (!MCID.ImplicitDefs)
1360
1.25M
      continue;
1361
5.19M
    
for (const MCPhysReg *Reg = MCID.getImplicitDefs(); 2.57M
*Reg5.19M
;
++Reg2.61M
)
1362
2.61M
      CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1363
5.17M
  }
1364
29.6M
1365
29.6M
  return !LRegs.empty();
1366
29.6M
}
1367
1368
2.55M
void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1369
2.55M
  // Add the nodes that aren't ready back onto the available list.
1370
3.58M
  for (unsigned i = Interferences.size(); 
i > 03.58M
;
--i1.03M
) {
1371
1.03M
    SUnit *SU = Interferences[i-1];
1372
1.03M
    LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1373
1.03M
    if (
Reg1.03M
) {
1374
1.03M
      SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1375
1.03M
      if (!is_contained(LRegs, Reg))
1376
1.01M
        continue;
1377
19.0k
    }
1378
19.0k
    SU->isPending = false;
1379
19.0k
    // The interfering node may no longer be available due to backtracking.
1380
19.0k
    // Furthermore, it may have been made available again, in which case it is
1381
19.0k
    // now already in the AvailableQueue.
1382
19.0k
    if (
SU->isAvailable && 19.0k
!SU->NodeQueueId9.35k
) {
1383
8.76k
      DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
1384
8.76k
      AvailableQueue->push(SU);
1385
8.76k
    }
1386
19.0k
    if (i < Interferences.size())
1387
0
      Interferences[i-1] = Interferences.back();
1388
1.03M
    Interferences.pop_back();
1389
1.03M
    LRegsMap.erase(LRegsPos);
1390
1.03M
  }
1391
2.55M
}
1392
1393
/// Return a node that can be scheduled in this cycle. Requirements:
1394
/// (1) Ready: latency has been satisfied
1395
/// (2) No Hazards: resources are available
1396
/// (3) No Interferences: may unschedule to break register interferences.
1397
29.6M
SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1398
29.6M
  SUnit *CurSU = AvailableQueue->empty() ? 
nullptr726
:
AvailableQueue->pop()29.6M
;
1399
29.6M
  auto FindAvailableNode = [&]() {
1400
29.6M
    while (
CurSU29.6M
) {
1401
29.6M
      SmallVector<unsigned, 4> LRegs;
1402
29.6M
      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1403
29.6M
        break;
1404
19.0k
      
DEBUG19.0k
(dbgs() << " Interfering reg " <<
1405
19.0k
            (LRegs[0] == TRI->getNumRegs() ? "CallResource"
1406
19.0k
             : TRI->getName(LRegs[0]))
1407
19.0k
             << " SU #" << CurSU->NodeNum << '\n');
1408
19.0k
      std::pair<LRegsMapT::iterator, bool> LRegsPair =
1409
19.0k
        LRegsMap.insert(std::make_pair(CurSU, LRegs));
1410
19.0k
      if (
LRegsPair.second19.0k
) {
1411
19.0k
        CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1412
19.0k
        Interferences.push_back(CurSU);
1413
19.0k
      }
1414
18.4E
      else {
1415
18.4E
        assert(CurSU->isPending && "Interferences are pending");
1416
18.4E
        // Update the interference with current live regs.
1417
18.4E
        LRegsPair.first->second = LRegs;
1418
18.4E
      }
1419
29.6M
      CurSU = AvailableQueue->pop();
1420
29.6M
    }
1421
29.6M
  };
1422
29.6M
  FindAvailableNode();
1423
29.6M
  if (CurSU)
1424
29.6M
    return CurSU;
1425
2.30k
1426
2.30k
  // All candidates are delayed due to live physical reg dependencies.
1427
2.30k
  // Try backtracking, code duplication, or inserting cross class copies
1428
2.30k
  // to resolve it.
1429
2.30k
  
for (SUnit *TrySU : Interferences) 2.30k
{
1430
3.85k
    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1431
3.85k
1432
3.85k
    // Try unscheduling up to the point where it's safe to schedule
1433
3.85k
    // this node.
1434
3.85k
    SUnit *BtSU = nullptr;
1435
3.85k
    unsigned LiveCycle = UINT_MAX;
1436
3.85k
    for (unsigned Reg : LRegs) {
1437
3.85k
      if (
LiveRegGens[Reg]->getHeight() < LiveCycle3.85k
) {
1438
3.85k
        BtSU = LiveRegGens[Reg];
1439
3.85k
        LiveCycle = BtSU->getHeight();
1440
3.85k
      }
1441
3.85k
    }
1442
3.85k
    if (
!WillCreateCycle(TrySU, BtSU)3.85k
) {
1443
1.72k
      // BacktrackBottomUp mutates Interferences!
1444
1.72k
      BacktrackBottomUp(TrySU, BtSU);
1445
1.72k
1446
1.72k
      // Force the current node to be scheduled before the node that
1447
1.72k
      // requires the physical reg dep.
1448
1.72k
      if (
BtSU->isAvailable1.72k
) {
1449
1.72k
        BtSU->isAvailable = false;
1450
1.72k
        if (!BtSU->isPending)
1451
1.72k
          AvailableQueue->remove(BtSU);
1452
1.72k
      }
1453
1.72k
      DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1454
1.72k
            << TrySU->NodeNum << ")\n");
1455
1.72k
      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1456
1.72k
1457
1.72k
      // If one or more successors has been unscheduled, then the current
1458
1.72k
      // node is no longer available.
1459
1.72k
      if (
!TrySU->isAvailable || 1.72k
!TrySU->NodeQueueId578
) {
1460
1.14k
        DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1461
1.14k
        CurSU = AvailableQueue->pop();
1462
1.72k
      } else {
1463
578
        DEBUG(dbgs() << "TrySU available\n");
1464
578
        // Available and in AvailableQueue
1465
578
        AvailableQueue->remove(TrySU);
1466
578
        CurSU = TrySU;
1467
578
      }
1468
1.72k
      FindAvailableNode();
1469
1.72k
      // Interferences has been mutated. We must break.
1470
1.72k
      break;
1471
1.72k
    }
1472
2.30k
  }
1473
2.30k
1474
2.30k
  if (
!CurSU2.30k
) {
1475
584
    // Can't backtrack. If it's too expensive to copy the value, then try
1476
584
    // duplicate the nodes that produces these "too expensive to copy"
1477
584
    // values to break the dependency. In case even that doesn't work,
1478
584
    // insert cross class copies.
1479
584
    // If it's not too expensive, i.e. cost != -1, issue copies.
1480
584
    SUnit *TrySU = Interferences[0];
1481
584
    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1482
584
    assert(LRegs.size() == 1 && "Can't handle this yet!");
1483
584
    unsigned Reg = LRegs[0];
1484
584
    SUnit *LRDef = LiveRegDefs[Reg];
1485
584
    MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1486
584
    const TargetRegisterClass *RC =
1487
584
      TRI->getMinimalPhysRegClass(Reg, VT);
1488
584
    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1489
584
1490
584
    // If cross copy register class is the same as RC, then it must be possible
1491
584
    // copy the value directly. Do not try duplicate the def.
1492
584
    // If cross copy register class is not the same as RC, then it's possible to
1493
584
    // copy the value but it require cross register class copies and it is
1494
584
    // expensive.
1495
584
    // If cross copy register class is null, then it's not possible to copy
1496
584
    // the value at all.
1497
584
    SUnit *NewDef = nullptr;
1498
584
    if (
DestRC != RC584
) {
1499
584
      NewDef = CopyAndMoveSuccessors(LRDef);
1500
584
      if (
!DestRC && 584
!NewDef0
)
1501
0
        report_fatal_error("Can't handle live physical register dependency!");
1502
584
    }
1503
584
    
if (584
!NewDef584
) {
1504
52
      // Issue copies, these can be expensive cross register class copies.
1505
52
      SmallVector<SUnit*, 2> Copies;
1506
52
      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1507
52
      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1508
52
            << " to SU #" << Copies.front()->NodeNum << "\n");
1509
52
      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1510
52
      NewDef = Copies.back();
1511
52
    }
1512
584
1513
584
    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1514
584
          << " to SU #" << TrySU->NodeNum << "\n");
1515
584
    LiveRegDefs[Reg] = NewDef;
1516
584
    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1517
584
    TrySU->isAvailable = false;
1518
584
    CurSU = NewDef;
1519
584
  }
1520
2.30k
  assert(CurSU && "Unable to resolve live physical register dependencies!");
1521
2.30k
  return CurSU;
1522
29.6M
}
1523
1524
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1525
/// schedulers.
1526
3.42M
void ScheduleDAGRRList::ListScheduleBottomUp() {
1527
3.42M
  // Release any predecessors of the special Exit node.
1528
3.42M
  ReleasePredecessors(&ExitSU);
1529
3.42M
1530
3.42M
  // Add root to Available queue.
1531
3.42M
  if (
!SUnits.empty()3.42M
) {
1532
3.33M
    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1533
3.33M
    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1534
3.33M
    RootSU->isAvailable = true;
1535
3.33M
    AvailableQueue->push(RootSU);
1536
3.33M
  }
1537
3.42M
1538
3.42M
  // While Available queue is not empty, grab the node with the highest
1539
3.42M
  // priority. If it is not ready put it back.  Schedule the node.
1540
3.42M
  Sequence.reserve(SUnits.size());
1541
33.1M
  while (
!AvailableQueue->empty() || 33.1M
!Interferences.empty()3.42M
) {
1542
29.6M
    DEBUG(dbgs() << "\nExamining Available:\n";
1543
29.6M
          AvailableQueue->dump(this));
1544
29.6M
1545
29.6M
    // Pick the best node to schedule taking all constraints into
1546
29.6M
    // consideration.
1547
29.6M
    SUnit *SU = PickNodeToScheduleBottomUp();
1548
29.6M
1549
29.6M
    AdvancePastStalls(SU);
1550
29.6M
1551
29.6M
    ScheduleNodeBottomUp(SU);
1552
29.6M
1553
29.6M
    while (
AvailableQueue->empty() && 29.6M
!PendingQueue.empty()3.33M
) {
1554
0
      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1555
0
      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1556
0
      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1557
0
    }
1558
29.6M
  }
1559
3.42M
1560
3.42M
  // Reverse the order if it is bottom up.
1561
3.42M
  std::reverse(Sequence.begin(), Sequence.end());
1562
3.42M
1563
#ifndef NDEBUG
1564
  VerifyScheduledSequence(/*isBottomUp=*/true);
1565
#endif
1566
}
1567
1568
//===----------------------------------------------------------------------===//
1569
//                RegReductionPriorityQueue Definition
1570
//===----------------------------------------------------------------------===//
1571
//
1572
// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1573
// to reduce register pressure.
1574
//
1575
namespace {
1576
class RegReductionPQBase;
1577
1578
struct queue_sort {
1579
0
  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1580
};
1581
1582
#ifndef NDEBUG
1583
template<class SF>
1584
struct reverse_sort : public queue_sort {
1585
  SF &SortFunc;
1586
  reverse_sort(SF &sf) : SortFunc(sf) {}
1587
1588
  bool operator()(SUnit* left, SUnit* right) const {
1589
    // reverse left/right rather than simply !SortFunc(left, right)
1590
    // to expose different paths in the comparison logic.
1591
    return SortFunc(right, left);
1592
  }
1593
};
1594
#endif // NDEBUG
1595
1596
/// bu_ls_rr_sort - Priority function for bottom up register pressure
1597
// reduction scheduler.
1598
struct bu_ls_rr_sort : public queue_sort {
1599
  enum {
1600
    IsBottomUp = true,
1601
    HasReadyFilter = false
1602
  };
1603
1604
  RegReductionPQBase *SPQ;
1605
17.5k
  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1606
1607
  bool operator()(SUnit* left, SUnit* right) const;
1608
};
1609
1610
// src_ls_rr_sort - Priority function for source order scheduler.
1611
struct src_ls_rr_sort : public queue_sort {
1612
  enum {
1613
    IsBottomUp = true,
1614
    HasReadyFilter = false
1615
  };
1616
1617
  RegReductionPQBase *SPQ;
1618
  src_ls_rr_sort(RegReductionPQBase *spq)
1619
3.35M
    : SPQ(spq) {}
1620
1621
  bool operator()(SUnit* left, SUnit* right) const;
1622
};
1623
1624
// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1625
struct hybrid_ls_rr_sort : public queue_sort {
1626
  enum {
1627
    IsBottomUp = true,
1628
    HasReadyFilter = false
1629
  };
1630
1631
  RegReductionPQBase *SPQ;
1632
  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1633
37.1k
    : SPQ(spq) {}
1634
1635
  bool isReady(SUnit *SU, unsigned CurCycle) const;
1636
1637
  bool operator()(SUnit* left, SUnit* right) const;
1638
};
1639
1640
// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1641
// scheduler.
1642
struct ilp_ls_rr_sort : public queue_sort {
1643
  enum {
1644
    IsBottomUp = true,
1645
    HasReadyFilter = false
1646
  };
1647
1648
  RegReductionPQBase *SPQ;
1649
  ilp_ls_rr_sort(RegReductionPQBase *spq)
1650
15.3k
    : SPQ(spq) {}
1651
1652
  bool isReady(SUnit *SU, unsigned CurCycle) const;
1653
1654
  bool operator()(SUnit* left, SUnit* right) const;
1655
};
1656
1657
class RegReductionPQBase : public SchedulingPriorityQueue {
1658
protected:
1659
  std::vector<SUnit*> Queue;
1660
  unsigned CurQueueId;
1661
  bool TracksRegPressure;
1662
  bool SrcOrder;
1663
1664
  // SUnits - The SUnits for the current graph.
1665
  std::vector<SUnit> *SUnits;
1666
1667
  MachineFunction &MF;
1668
  const TargetInstrInfo *TII;
1669
  const TargetRegisterInfo *TRI;
1670
  const TargetLowering *TLI;
1671
  ScheduleDAGRRList *scheduleDAG;
1672
1673
  // SethiUllmanNumbers - The SethiUllman number for each node.
1674
  std::vector<unsigned> SethiUllmanNumbers;
1675
1676
  /// RegPressure - Tracking current reg pressure per register class.
1677
  ///
1678
  std::vector<unsigned> RegPressure;
1679
1680
  /// RegLimit - Tracking the number of allocatable registers per register
1681
  /// class.
1682
  std::vector<unsigned> RegLimit;
1683
1684
public:
1685
  RegReductionPQBase(MachineFunction &mf,
1686
                     bool hasReadyFilter,
1687
                     bool tracksrp,
1688
                     bool srcorder,
1689
                     const TargetInstrInfo *tii,
1690
                     const TargetRegisterInfo *tri,
1691
                     const TargetLowering *tli)
1692
    : SchedulingPriorityQueue(hasReadyFilter),
1693
      CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
1694
3.42M
      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
1695
3.42M
    if (
TracksRegPressure3.42M
) {
1696
52.4k
      unsigned NumRC = TRI->getNumRegClasses();
1697
52.4k
      RegLimit.resize(NumRC);
1698
52.4k
      RegPressure.resize(NumRC);
1699
52.4k
      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1700
52.4k
      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1701
52.4k
      for (const TargetRegisterClass *RC : TRI->regclasses())
1702
4.74M
        RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1703
52.4k
    }
1704
3.42M
  }
1705
1706
3.42M
  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1707
3.42M
    scheduleDAG = scheduleDag;
1708
3.42M
  }
1709
1710
16.3M
  ScheduleHazardRecognizer* getHazardRec() {
1711
16.3M
    return scheduleDAG->getHazardRec();
1712
16.3M
  }
1713
1714
  void initNodes(std::vector<SUnit> &sunits) override;
1715
1716
  void addNode(const SUnit *SU) override;
1717
1718
  void updateNode(const SUnit *SU) override;
1719
1720
3.42M
  void releaseState() override {
1721
3.42M
    SUnits = nullptr;
1722
3.42M
    SethiUllmanNumbers.clear();
1723
3.42M
    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1724
3.42M
  }
1725
1726
  unsigned getNodePriority(const SUnit *SU) const;
1727
1728
92.5M
  unsigned getNodeOrdering(const SUnit *SU) const {
1729
92.5M
    if (
!SU->getNode()92.5M
)
return 072
;
1730
92.5M
1731
92.5M
    return SU->getNode()->getIROrder();
1732
92.5M
  }
1733
1734
122M
  bool empty() const override { return Queue.empty(); }
1735
1736
29.7M
  void push(SUnit *U) override {
1737
29.7M
    assert(!U->NodeQueueId && "Node in the queue already");
1738
29.7M
    U->NodeQueueId = ++CurQueueId;
1739
29.7M
    Queue.push_back(U);
1740
29.7M
  }
1741
1742
47.7k
  void remove(SUnit *SU) override {
1743
47.7k
    assert(!Queue.empty() && "Queue is empty!");
1744
47.7k
    assert(SU->NodeQueueId != 0 && "Not in queue!");
1745
47.7k
    std::vector<SUnit *>::iterator I = find(Queue, SU);
1746
47.7k
    if (I != std::prev(Queue.end()))
1747
11.9k
      std::swap(*I, Queue.back());
1748
47.7k
    Queue.pop_back();
1749
47.7k
    SU->NodeQueueId = 0;
1750
47.7k
  }
1751
1752
14
  bool tracksRegPressure() const override { return TracksRegPressure; }
1753
1754
  void dumpRegPressure() const;
1755
1756
  bool HighRegPressure(const SUnit *SU) const;
1757
1758
  bool MayReduceRegPressure(SUnit *SU) const;
1759
1760
  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1761
1762
  void scheduledNode(SUnit *SU) override;
1763
1764
  void unscheduledNode(SUnit *SU) override;
1765
1766
protected:
1767
  bool canClobber(const SUnit *SU, const SUnit *Op);
1768
  void AddPseudoTwoAddrDeps();
1769
  void PrescheduleNodesWithMultipleUses();
1770
  void CalculateSethiUllmanNumbers();
1771
};
1772
1773
template<class SF>
1774
29.6M
static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1775
29.6M
  std::vector<SUnit *>::iterator Best = Q.begin();
1776
77.1M
  for (auto I = std::next(Q.begin()), E = Q.end(); 
I != E77.1M
;
++I47.4M
)
1777
47.4M
    
if (47.4M
Picker(*Best, *I)47.4M
)
1778
23.0M
      Best = I;
1779
29.6M
  SUnit *V = *Best;
1780
29.6M
  if (Best != std::prev(Q.end()))
1781
6.04M
    std::swap(*Best, Q.back());
1782
29.6M
  Q.pop_back();
1783
29.6M
  return V;
1784
29.6M
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::bu_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::bu_ls_rr_sort&)
Line
Count
Source
1774
130k
static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1775
130k
  std::vector<SUnit *>::iterator Best = Q.begin();
1776
253k
  for (auto I = std::next(Q.begin()), E = Q.end(); 
I != E253k
;
++I122k
)
1777
122k
    
if (122k
Picker(*Best, *I)122k
)
1778
55.9k
      Best = I;
1779
130k
  SUnit *V = *Best;
1780
130k
  if (Best != std::prev(Q.end()))
1781
29.6k
    std::swap(*Best, Q.back());
1782
130k
  Q.pop_back();
1783
130k
  return V;
1784
130k
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::src_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::src_ls_rr_sort&)
Line
Count
Source
1774
29.1M
static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1775
29.1M
  std::vector<SUnit *>::iterator Best = Q.begin();
1776
76.0M
  for (auto I = std::next(Q.begin()), E = Q.end(); 
I != E76.0M
;
++I46.8M
)
1777
46.8M
    
if (46.8M
Picker(*Best, *I)46.8M
)
1778
22.8M
      Best = I;
1779
29.1M
  SUnit *V = *Best;
1780
29.1M
  if (Best != std::prev(Q.end()))
1781
5.91M
    std::swap(*Best, Q.back());
1782
29.1M
  Q.pop_back();
1783
29.1M
  return V;
1784
29.1M
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::hybrid_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::hybrid_ls_rr_sort&)
Line
Count
Source
1774
286k
static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1775
286k
  std::vector<SUnit *>::iterator Best = Q.begin();
1776
634k
  for (auto I = std::next(Q.begin()), E = Q.end(); 
I != E634k
;
++I347k
)
1777
347k
    
if (347k
Picker(*Best, *I)347k
)
1778
155k
      Best = I;
1779
286k
  SUnit *V = *Best;
1780
286k
  if (Best != std::prev(Q.end()))
1781
74.9k
    std::swap(*Best, Q.back());
1782
286k
  Q.pop_back();
1783
286k
  return V;
1784
286k
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::ilp_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::ilp_ls_rr_sort&)
Line
Count
Source
1774
121k
static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1775
121k
  std::vector<SUnit *>::iterator Best = Q.begin();
1776
274k
  for (auto I = std::next(Q.begin()), E = Q.end(); 
I != E274k
;
++I152k
)
1777
152k
    
if (152k
Picker(*Best, *I)152k
)
1778
65.4k
      Best = I;
1779
121k
  SUnit *V = *Best;
1780
121k
  if (Best != std::prev(Q.end()))
1781
26.4k
    std::swap(*Best, Q.back());
1782
121k
  Q.pop_back();
1783
121k
  return V;
1784
121k
}
1785
1786
template<class SF>
1787
29.6M
SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1788
#ifndef NDEBUG
1789
  if (DAG->StressSched) {
1790
    reverse_sort<SF> RPicker(Picker);
1791
    return popFromQueueImpl(Q, RPicker);
1792
  }
1793
#endif
1794
  (void)DAG;
1795
29.6M
  return popFromQueueImpl(Q, Picker);
1796
29.6M
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::ilp_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::ilp_ls_rr_sort&, llvm::ScheduleDAG*)
Line
Count
Source
1787
121k
SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1788
#ifndef NDEBUG
1789
  if (DAG->StressSched) {
1790
    reverse_sort<SF> RPicker(Picker);
1791
    return popFromQueueImpl(Q, RPicker);
1792
  }
1793
#endif
1794
  (void)DAG;
1795
121k
  return popFromQueueImpl(Q, Picker);
1796
121k
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::hybrid_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::hybrid_ls_rr_sort&, llvm::ScheduleDAG*)
Line
Count
Source
1787
286k
SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1788
#ifndef NDEBUG
1789
  if (DAG->StressSched) {
1790
    reverse_sort<SF> RPicker(Picker);
1791
    return popFromQueueImpl(Q, RPicker);
1792
  }
1793
#endif
1794
  (void)DAG;
1795
286k
  return popFromQueueImpl(Q, Picker);
1796
286k
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::src_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::src_ls_rr_sort&, llvm::ScheduleDAG*)
Line
Count
Source
1787
29.1M
SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1788
#ifndef NDEBUG
1789
  if (DAG->StressSched) {
1790
    reverse_sort<SF> RPicker(Picker);
1791
    return popFromQueueImpl(Q, RPicker);
1792
  }
1793
#endif
1794
  (void)DAG;
1795
29.1M
  return popFromQueueImpl(Q, Picker);
1796
29.1M
}
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::bu_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::bu_ls_rr_sort&, llvm::ScheduleDAG*)
Line
Count
Source
1787
130k
SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1788
#ifndef NDEBUG
1789
  if (DAG->StressSched) {
1790
    reverse_sort<SF> RPicker(Picker);
1791
    return popFromQueueImpl(Q, RPicker);
1792
  }
1793
#endif
1794
  (void)DAG;
1795
130k
  return popFromQueueImpl(Q, Picker);
1796
130k
}
1797
1798
template<class SF>
1799
class RegReductionPriorityQueue : public RegReductionPQBase {
1800
  SF Picker;
1801
1802
public:
1803
  RegReductionPriorityQueue(MachineFunction &mf,
1804
                            bool tracksrp,
1805
                            bool srcorder,
1806
                            const TargetInstrInfo *tii,
1807
                            const TargetRegisterInfo *tri,
1808
                            const TargetLowering *tli)
1809
    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1810
                         tii, tri, tli),
1811
3.42M
      Picker(this) {}
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*)
Line
Count
Source
1811
15.3k
      Picker(this) {}
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*)
Line
Count
Source
1811
17.5k
      Picker(this) {}
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*)
Line
Count
Source
1811
3.35M
      Picker(this) {}
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*)
Line
Count
Source
1811
37.1k
      Picker(this) {}
1812
1813
0
  bool isBottomUp() const override { return SF::IsBottomUp; }
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::isBottomUp() const
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::isBottomUp() const
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::isBottomUp() const
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::isBottomUp() const
1814
1815
0
  bool isReady(SUnit *U) const override {
1816
0
    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1817
0
  }
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::isReady(llvm::SUnit*) const
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::isReady(llvm::SUnit*) const
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::isReady(llvm::SUnit*) const
Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::isReady(llvm::SUnit*) const
1818
1819
29.6M
  SUnit *pop() override {
1820
29.6M
    if (
Queue.empty()29.6M
)
return nullptr1.58k
;
1821
29.6M
1822
29.6M
    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1823
29.6M
    V->NodeQueueId = 0;
1824
29.6M
    return V;
1825
29.6M
  }
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::pop()
Line
Count
Source
1819
287k
  SUnit *pop() override {
1820
287k
    if (
Queue.empty()287k
)
return nullptr62
;
1821
286k
1822
286k
    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1823
286k
    V->NodeQueueId = 0;
1824
286k
    return V;
1825
286k
  }
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::pop()
Line
Count
Source
1819
130k
  SUnit *pop() override {
1820
130k
    if (
Queue.empty()130k
)
return nullptr71
;
1821
130k
1822
130k
    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1823
130k
    V->NodeQueueId = 0;
1824
130k
    return V;
1825
130k
  }
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::pop()
Line
Count
Source
1819
29.1M
  SUnit *pop() override {
1820
29.1M
    if (
Queue.empty()29.1M
)
return nullptr1.44k
;
1821
29.1M
1822
29.1M
    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1823
29.1M
    V->NodeQueueId = 0;
1824
29.1M
    return V;
1825
29.1M
  }
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::pop()
Line
Count
Source
1819
121k
  SUnit *pop() override {
1820
121k
    if (
Queue.empty()121k
)
return nullptr3
;
1821
121k
1822
121k
    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1823
121k
    V->NodeQueueId = 0;
1824
121k
    return V;
1825
121k
  }
1826
1827
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1828
  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1829
    // Emulate pop() without clobbering NodeQueueIds.
1830
    std::vector<SUnit*> DumpQueue = Queue;
1831
    SF DumpPicker = Picker;
1832
    while (!DumpQueue.empty()) {
1833
      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1834
      dbgs() << "Height " << SU->getHeight() << ": ";
1835
      SU->dump(DAG);
1836
    }
1837
  }
1838
#endif
1839
};
1840
1841
typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1842
BURegReductionPriorityQueue;
1843
1844
typedef RegReductionPriorityQueue<src_ls_rr_sort>
1845
SrcRegReductionPriorityQueue;
1846
1847
typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1848
HybridBURRPriorityQueue;
1849
1850
typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1851
ILPBURRPriorityQueue;
1852
} // end anonymous namespace
1853
1854
//===----------------------------------------------------------------------===//
1855
//           Static Node Priority for Register Pressure Reduction
1856
//===----------------------------------------------------------------------===//
1857
1858
// Check for special nodes that bypass scheduling heuristics.
1859
// Currently this pushes TokenFactor nodes down, but may be used for other
1860
// pseudo-ops as well.
1861
//
1862
// Return -1 to schedule right above left, 1 for left above right.
1863
// Return 0 if no bias exists.
1864
47.4M
static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1865
47.4M
  bool LSchedLow = left->isScheduleLow;
1866
47.4M
  bool RSchedLow = right->isScheduleLow;
1867
47.4M
  if (LSchedLow != RSchedLow)
1868
597k
    
return LSchedLow < RSchedLow ? 597k
1279k
:
-1318k
;
1869
46.8M
  return 0;
1870
46.8M
}
1871
1872
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1873
/// Smaller number is the higher priority.
1874
static unsigned
1875
29.6M
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1876
29.6M
  if (SUNumbers[SU->NodeNum] != 0)
1877
16.4M
    return SUNumbers[SU->NodeNum];
1878
13.1M
1879
13.1M
  // Use WorkList to avoid stack overflow on excessively large IRs.
1880
13.1M
  struct WorkState {
1881
29.6M
    WorkState(const SUnit *SU) : SU(SU) {}
1882
13.1M
    const SUnit *SU;
1883
13.1M
    unsigned PredsProcessed = 0;
1884
13.1M
  };
1885
13.1M
1886
13.1M
  SmallVector<WorkState, 16> WorkList;
1887
13.1M
  WorkList.push_back(SU);
1888
59.2M
  while (
!WorkList.empty()59.2M
) {
1889
46.1M
    auto &Temp = WorkList.back();
1890
46.1M
    auto *TempSU = Temp.SU;
1891
46.1M
    bool AllPredsKnown = true;
1892
46.1M
    // Try to find a non-evaluated pred and push it into the processing stack.
1893
63.8M
    for (unsigned P = Temp.PredsProcessed; 
P < TempSU->Preds.size()63.8M
;
++P17.6M
) {
1894
34.1M
      auto &Pred = TempSU->Preds[P];
1895
34.1M
      if (
Pred.isCtrl()34.1M
)
continue12.6M
; // ignore chain preds
1896
21.4M
      SUnit *PredSU = Pred.getSUnit();
1897
21.4M
      if (
SUNumbers[PredSU->NodeNum] == 021.4M
) {
1898
#ifndef NDEBUG
1899
        // In debug mode, check that we don't have such element in the stack.
1900
        for (auto It : WorkList)
1901
          assert(It.SU != PredSU && "Trying to push an element twice?");
1902
#endif
1903
        // Next time start processing this one starting from the next pred.
1904
16.4M
        Temp.PredsProcessed = P + 1;
1905
16.4M
        WorkList.push_back(PredSU);
1906
16.4M
        AllPredsKnown = false;
1907
16.4M
        break;
1908
16.4M
      }
1909
34.1M
    }
1910
46.1M
1911
46.1M
    if (!AllPredsKnown)
1912
16.4M
      continue;
1913
29.6M
1914
29.6M
    // Once all preds are known, we can calculate the answer for this one.
1915
29.6M
    unsigned SethiUllmanNumber = 0;
1916
29.6M
    unsigned Extra = 0;
1917
34.1M
    for (const SDep &Pred : TempSU->Preds) {
1918
34.1M
      if (
Pred.isCtrl()34.1M
)
continue12.6M
; // ignore chain preds
1919
21.4M
      SUnit *PredSU = Pred.getSUnit();
1920
21.4M
      unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1921
21.4M
      assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1922
21.4M
      if (
PredSethiUllman > SethiUllmanNumber21.4M
) {
1923
16.2M
        SethiUllmanNumber = PredSethiUllman;
1924
16.2M
        Extra = 0;
1925
21.4M
      } else 
if (5.19M
PredSethiUllman == SethiUllmanNumber5.19M
)
1926
4.09M
        ++Extra;
1927
34.1M
    }
1928
29.6M
1929
29.6M
    SethiUllmanNumber += Extra;
1930
29.6M
    if (SethiUllmanNumber == 0)
1931
14.3M
      SethiUllmanNumber = 1;
1932
46.1M
    SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1933
46.1M
    WorkList.pop_back();
1934
46.1M
  }
1935
29.6M
1936
29.6M
  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1937
29.6M
  return SUNumbers[SU->NodeNum];
1938
29.6M
}
1939
1940
/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1941
/// scheduling units.
1942
3.42M
void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1943
3.42M
  SethiUllmanNumbers.assign(SUnits->size(), 0);
1944
3.42M
1945
3.42M
  for (const SUnit &SU : *SUnits)
1946
29.6M
    CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1947
3.42M
}
1948
1949
650
void RegReductionPQBase::addNode(const SUnit *SU) {
1950
650
  unsigned SUSize = SethiUllmanNumbers.size();
1951
650
  if (SUnits->size() > SUSize)
1952
322
    SethiUllmanNumbers.resize(SUSize*2, 0);
1953
650
  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1954
650
}
1955
1956
572
void RegReductionPQBase::updateNode(const SUnit *SU) {
1957
572
  SethiUllmanNumbers[SU->NodeNum] = 0;
1958
572
  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1959
572
}
1960
1961
// Lower priority means schedule further down. For bottom-up scheduling, lower
1962
// priority SUs are scheduled before higher priority SUs.
1963
30.6M
unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1964
30.6M
  assert(SU->NodeNum < SethiUllmanNumbers.size());
1965
30.6M
  unsigned Opc = SU->getNode() ? 
SU->getNode()->getOpcode()30.6M
:
017
;
1966
30.6M
  if (
Opc == ISD::TokenFactor || 30.6M
Opc == ISD::CopyToReg30.6M
)
1967
30.6M
    // CopyToReg should be close to its uses to facilitate coalescing and
1968
30.6M
    // avoid spilling.
1969
1.23M
    return 0;
1970
29.4M
  
if (29.4M
Opc == TargetOpcode::EXTRACT_SUBREG ||
1971
29.4M
      Opc == TargetOpcode::SUBREG_TO_REG ||
1972
29.4M
      Opc == TargetOpcode::INSERT_SUBREG)
1973
29.4M
    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1974
29.4M
    // close to their uses to facilitate coalescing.
1975
0
    return 0;
1976
29.4M
  
if (29.4M
SU->NumSuccs == 0 && 29.4M
SU->NumPreds != 07.65M
)
1977
29.4M
    // If SU does not have a register use, i.e. it doesn't produce a value
1978
29.4M
    // that would be consumed (e.g. store), then it terminates a chain of
1979
29.4M
    // computation.  Give it a large SethiUllman number so it will be
1980
29.4M
    // scheduled right before its predecessors that it doesn't lengthen
1981
29.4M
    // their live ranges.
1982
6.21M
    return 0xffff;
1983
23.2M
  
if (23.2M
SU->NumPreds == 0 && 23.2M
SU->NumSuccs != 09.84M
)
1984
23.2M
    // If SU does not have a register def, schedule it close to its uses
1985
23.2M
    // because it does not lengthen any live ranges.
1986
8.40M
    return 0;
1987
14.8M
#if 1
1988
14.8M
  return SethiUllmanNumbers[SU->NodeNum];
1989
#else
1990
  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1991
  if (SU->isCallOp) {
1992
    // FIXME: This assumes all of the defs are used as call operands.
1993
    int NP = (int)Priority - SU->getNode()->getNumValues();
1994
    return (NP > 0) ? NP : 0;
1995
  }
1996
  return Priority;
1997
#endif
1998
}
1999
2000
//===----------------------------------------------------------------------===//
2001
//                     Register Pressure Tracking
2002
//===----------------------------------------------------------------------===//
2003
2004
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2005
LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2006
  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2007
    unsigned Id = RC->getID();
2008
    unsigned RP = RegPressure[Id];
2009
    if (!RP) continue;
2010
    DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2011
          << RegLimit[Id] << '\n');
2012
  }
2013
}
2014
#endif
2015
2016
666k
bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2017
666k
  if (!TLI)
2018
0
    return false;
2019
666k
2020
666k
  
for (const SDep &Pred : SU->Preds) 666k
{
2021
740k
    if (Pred.isCtrl())
2022
88.8k
      continue;
2023
651k
    SUnit *PredSU = Pred.getSUnit();
2024
651k
    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2025
651k
    // to cover the number of registers defined (they are all live).
2026
651k
    if (
PredSU->NumRegDefsLeft == 0651k
) {
2027
235k
      continue;
2028
235k
    }
2029
416k
    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2030
843k
         
RegDefPos.IsValid()843k
;
RegDefPos.Advance()426k
) {
2031
454k
      unsigned RCId, Cost;
2032
454k
      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2033
454k
2034
454k
      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2035
27.3k
        return true;
2036
454k
    }
2037
740k
  }
2038
639k
  return false;
2039
666k
}
2040
2041
0
bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2042
0
  const SDNode *N = SU->getNode();
2043
0
2044
0
  if (
!N->isMachineOpcode() || 0
!SU->NumSuccs0
)
2045
0
    return false;
2046
0
2047
0
  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2048
0
  for (unsigned i = 0; 
i != NumDefs0
;
++i0
) {
2049
0
    MVT VT = N->getSimpleValueType(i);
2050
0
    if (!N->hasAnyUseOfValue(i))
2051
0
      continue;
2052
0
    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2053
0
    if (RegPressure[RCId] >= RegLimit[RCId])
2054
0
      return true;
2055
0
  }
2056
0
  return false;
2057
0
}
2058
2059
// Compute the register pressure contribution by this instruction by count up
2060
// for uses that are not live and down for defs. Only count register classes
2061
// that are already under high pressure. As a side effect, compute the number of
2062
// uses of registers that are already live.
2063
//
2064
// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2065
// so could probably be factored.
2066
293k
int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2067
293k
  LiveUses = 0;
2068
293k
  int PDiff = 0;
2069
310k
  for (const SDep &Pred : SU->Preds) {
2070
310k
    if (Pred.isCtrl())
2071
26.1k
      continue;
2072
284k
    SUnit *PredSU = Pred.getSUnit();
2073
284k
    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2074
284k
    // to cover the number of registers defined (they are all live).
2075
284k
    if (
PredSU->NumRegDefsLeft == 0284k
) {
2076
67.8k
      if (PredSU->getNode()->isMachineOpcode())
2077
56.0k
        ++LiveUses;
2078
67.8k
      continue;
2079
67.8k
    }
2080
216k
    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2081
433k
         
RegDefPos.IsValid()433k
;
RegDefPos.Advance()217k
) {
2082
217k
      MVT VT = RegDefPos.GetValue();
2083
217k
      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2084
217k
      if (RegPressure[RCId] >= RegLimit[RCId])
2085
82.3k
        ++PDiff;
2086
217k
    }
2087
310k
  }
2088
293k
  const SDNode *N = SU->getNode();
2089
293k
2090
293k
  if (
!N || 293k
!N->isMachineOpcode()293k
||
!SU->NumSuccs193k
)
2091
165k
    return PDiff;
2092
128k
2093
128k
  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2094
257k
  for (unsigned i = 0; 
i != NumDefs257k
;
++i128k
) {
2095
128k
    MVT VT = N->getSimpleValueType(i);
2096
128k
    if (!N->hasAnyUseOfValue(i))
2097
6
      continue;
2098
128k
    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2099
128k
    if (RegPressure[RCId] >= RegLimit[RCId])
2100
25.2k
      --PDiff;
2101
128k
  }
2102
293k
  return PDiff;
2103
293k
}
2104
2105
29.6M
void RegReductionPQBase::scheduledNode(SUnit *SU) {
2106
29.6M
  if (!TracksRegPressure)
2107
29.2M
    return;
2108
408k
2109
408k
  
if (408k
!SU->getNode()408k
)
2110
4
    return;
2111
408k
2112
408k
  
for (const SDep &Pred : SU->Preds) 408k
{
2113
461k
    if (Pred.isCtrl())
2114
159k
      continue;
2115
302k
    SUnit *PredSU = Pred.getSUnit();
2116
302k
    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2117
302k
    // to cover the number of registers defined (they are all live).
2118
302k
    if (
PredSU->NumRegDefsLeft == 0302k
) {
2119
60.3k
      continue;
2120
60.3k
    }
2121
241k
    // FIXME: The ScheduleDAG currently loses information about which of a
2122
241k
    // node's values is consumed by each dependence. Consequently, if the node
2123
241k
    // defines multiple register classes, we don't know which to pressurize
2124
241k
    // here. Instead the following loop consumes the register defs in an
2125
241k
    // arbitrary order. At least it handles the common case of clustered loads
2126
241k
    // to the same class. For precise liveness, each SDep needs to indicate the
2127
241k
    // result number. But that tightly couples the ScheduleDAG with the
2128
241k
    // SelectionDAG making updates tricky. A simpler hack would be to attach a
2129
241k
    // value type or register class to SDep.
2130
241k
    //
2131
241k
    // The most important aspect of register tracking is balancing the increase
2132
241k
    // here with the reduction further below. Note that this SU may use multiple
2133
241k
    // defs in PredSU. The can't be determined here, but we've already
2134
241k
    // compensated by reducing NumRegDefsLeft in PredSU during
2135
241k
    // ScheduleDAGSDNodes::AddSchedEdges.
2136
241k
    --PredSU->NumRegDefsLeft;
2137
241k
    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2138
241k
    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2139
244k
         
RegDefPos.IsValid()244k
;
RegDefPos.Advance(), --SkipRegDefs2.34k
) {
2140
244k
      if (SkipRegDefs)
2141
2.34k
        continue;
2142
241k
2143
241k
      unsigned RCId, Cost;
2144
241k
      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2145
241k
      RegPressure[RCId] += Cost;
2146
241k
      break;
2147
241k
    }
2148
461k
  }
2149
408k
2150
408k
  // We should have this assert, but there may be dead SDNodes that never
2151
408k
  // materialize as SUnits, so they don't appear to generate liveness.
2152
408k
  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2153
408k
  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2154
408k
  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2155
654k
       
RegDefPos.IsValid()654k
;
RegDefPos.Advance(), --SkipRegDefs246k
) {
2156
246k
    if (SkipRegDefs > 0)
2157
9
      continue;
2158
246k
    unsigned RCId, Cost;
2159
246k
    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2160
246k
    if (
RegPressure[RCId] < Cost246k
) {
2161
4.54k
      // Register pressure tracking is imprecise. This can happen. But we try
2162
4.54k
      // hard not to let it happen because it likely results in poor scheduling.
2163
4.54k
      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
2164
4.54k
      RegPressure[RCId] = 0;
2165
4.54k
    }
2166
241k
    else {
2167
241k
      RegPressure[RCId] -= Cost;
2168
241k
    }
2169
246k
  }
2170
408k
  DEBUG(dumpRegPressure());
2171
29.6M
}
2172
2173
58.2k
void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2174
58.2k
  if (!TracksRegPressure)
2175
57.9k
    return;
2176
228
2177
228
  const SDNode *N = SU->getNode();
2178
228
  if (
!N228
)
return0
;
2179
228
2180
228
  
if (228
!N->isMachineOpcode()228
) {
2181
46
    if (N->getOpcode() != ISD::CopyToReg)
2182
46
      return;
2183
182
  } else {
2184
182
    unsigned Opc = N->getMachineOpcode();
2185
182
    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2186
181
        Opc == TargetOpcode::INSERT_SUBREG ||
2187
181
        Opc == TargetOpcode::SUBREG_TO_REG ||
2188
181
        Opc == TargetOpcode::REG_SEQUENCE ||
2189
181
        Opc == TargetOpcode::IMPLICIT_DEF)
2190
1
      return;
2191
181
  }
2192
181
2193
181
  
for (const SDep &Pred : SU->Preds) 181
{
2194
284
    if (Pred.isCtrl())
2195
31
      continue;
2196
253
    SUnit *PredSU = Pred.getSUnit();
2197
253
    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2198
253
    // counts data deps.
2199
253
    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2200
87
      continue;
2201
166
    const SDNode *PN = PredSU->getNode();
2202
166
    if (
!PN->isMachineOpcode()166
) {
2203
42
      if (
PN->getOpcode() == ISD::CopyFromReg42
) {
2204
42
        MVT VT = PN->getSimpleValueType(0);
2205
42
        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2206
42
        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2207
42
      }
2208
42
      continue;
2209
42
    }
2210
124
    unsigned POpc = PN->getMachineOpcode();
2211
124
    if (POpc == TargetOpcode::IMPLICIT_DEF)
2212
0
      continue;
2213
124
    
if (124
POpc == TargetOpcode::EXTRACT_SUBREG ||
2214
124
        POpc == TargetOpcode::INSERT_SUBREG ||
2215
124
        
POpc == TargetOpcode::SUBREG_TO_REG124
) {
2216
2
      MVT VT = PN->getSimpleValueType(0);
2217
2
      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2218
2
      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2219
2
      continue;
2220
2
    }
2221
122
    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2222
228
    for (unsigned i = 0; 
i != NumDefs228
;
++i106
) {
2223
106
      MVT VT = PN->getSimpleValueType(i);
2224
106
      if (!PN->hasAnyUseOfValue(i))
2225
3
        continue;
2226
103
      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2227
103
      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2228
103
        // Register pressure tracking is imprecise. This can happen.
2229
20
        RegPressure[RCId] = 0;
2230
103
      else
2231
83
        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2232
106
    }
2233
284
  }
2234
181
2235
181
  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2236
181
  // may transfer data dependencies to CopyToReg.
2237
181
  if (
SU->NumSuccs && 181
N->isMachineOpcode()161
) {
2238
161
    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2239
216
    for (unsigned i = NumDefs, e = N->getNumValues(); 
i != e216
;
++i55
) {
2240
55
      MVT VT = N->getSimpleValueType(i);
2241
55
      if (
VT == MVT::Glue || 55
VT == MVT::Other55
)
2242
6
        continue;
2243
49
      
if (49
!N->hasAnyUseOfValue(i)49
)
2244
48
        continue;
2245
1
      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2246
1
      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2247
1
    }
2248
161
  }
2249
181
2250
181
  DEBUG(dumpRegPressure());
2251
58.2k
}
2252
2253
//===----------------------------------------------------------------------===//
2254
//           Dynamic Node Priority for Register Pressure Reduction
2255
//===----------------------------------------------------------------------===//
2256
2257
/// closestSucc - Returns the scheduled cycle of the successor which is
2258
/// closest to the current cycle.
2259
25.7M
static unsigned closestSucc(const SUnit *SU) {
2260
25.7M
  unsigned MaxHeight = 0;
2261
29.8M
  for (const SDep &Succ : SU->Succs) {
2262
29.8M
    if (
Succ.isCtrl()29.8M
)
continue7.98M
; // ignore chain succs
2263
21.8M
    unsigned Height = Succ.getSUnit()->getHeight();
2264
21.8M
    // If there are bunch of CopyToRegs stacked up, they should be considered
2265
21.8M
    // to be at the same position.
2266
21.8M
    if (Succ.getSUnit()->getNode() &&
2267
21.8M
        Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2268
966k
      Height = closestSucc(Succ.getSUnit())+1;
2269
21.8M
    if (Height > MaxHeight)
2270
19.2M
      MaxHeight = Height;
2271
29.8M
  }
2272
25.7M
  return MaxHeight;
2273
25.7M
}
2274
2275
/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2276
/// for scratch registers, i.e. number of data dependencies.
2277
11.1M
static unsigned calcMaxScratches(const SUnit *SU) {
2278
11.1M
  unsigned Scratches = 0;
2279
16.7M
  for (const SDep &Pred : SU->Preds) {
2280
16.7M
    if (
Pred.isCtrl()16.7M
)
continue4.13M
; // ignore chain preds
2281
12.6M
    Scratches++;
2282
12.6M
  }
2283
11.1M
  return Scratches;
2284
11.1M
}
2285
2286
/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2287
/// CopyFromReg from a virtual register.
2288
3.30M
static bool hasOnlyLiveInOpers(const SUnit *SU) {
2289
3.30M
  bool RetVal = false;
2290
3.51M
  for (const SDep &Pred : SU->Preds) {
2291
3.51M
    if (
Pred.isCtrl()3.51M
)
continue1.23M
;
2292
2.27M
    const SUnit *PredSU = Pred.getSUnit();
2293
2.27M
    if (PredSU->getNode() &&
2294
2.27M
        
PredSU->getNode()->getOpcode() == ISD::CopyFromReg2.27M
) {
2295
616k
      unsigned Reg =
2296
616k
        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2297
616k
      if (
TargetRegisterInfo::isVirtualRegister(Reg)616k
) {
2298
509k
        RetVal = true;
2299
509k
        continue;
2300
509k
      }
2301
1.76M
    }
2302
1.76M
    return false;
2303
1.76M
  }
2304
1.53M
  return RetVal;
2305
1.53M
}
2306
2307
/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2308
/// CopyToReg to a virtual register. This SU def is probably a liveout and
2309
/// it has no other use. It should be scheduled closer to the terminator.
2310
385k
static bool hasOnlyLiveOutUses(const SUnit *SU) {
2311
385k
  bool RetVal = false;
2312
449k
  for (const SDep &Succ : SU->Succs) {
2313
449k
    if (
Succ.isCtrl()449k
)
continue63.1k
;
2314
386k
    const SUnit *SuccSU = Succ.getSUnit();
2315
386k
    if (
SuccSU->getNode() && 386k
SuccSU->getNode()->getOpcode() == ISD::CopyToReg386k
) {
2316
73.1k
      unsigned Reg =
2317
73.1k
        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2318
73.1k
      if (
TargetRegisterInfo::isVirtualRegister(Reg)73.1k
) {
2319
73.1k
        RetVal = true;
2320
73.1k
        continue;
2321
73.1k
      }
2322
313k
    }
2323
313k
    return false;
2324
313k
  }
2325
71.9k
  return RetVal;
2326
71.9k
}
2327
2328
// Set isVRegCycle for a node with only live in opers and live out uses. Also
2329
// set isVRegCycle for its CopyFromReg operands.
2330
//
2331
// This is only relevant for single-block loops, in which case the VRegCycle
2332
// node is likely an induction variable in which the operand and target virtual
2333
// registers should be coalesced (e.g. pre/post increment values). Setting the
2334
// isVRegCycle flag helps the scheduler prioritize other uses of the same
2335
// CopyFromReg so that this node becomes the virtual register "kill". This
2336
// avoids interference between the values live in and out of the block and
2337
// eliminates a copy inside the loop.
2338
3.30M
static void initVRegCycle(SUnit *SU) {
2339
3.30M
  if (DisableSchedVRegCycle)
2340
0
    return;
2341
3.30M
2342
3.30M
  
if (3.30M
!hasOnlyLiveInOpers(SU) || 3.30M
!hasOnlyLiveOutUses(SU)385k
)
2343
3.24M
    return;
2344
61.1k
2345
61.1k
  
DEBUG61.1k
(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2346
61.1k
2347
61.1k
  SU->isVRegCycle = true;
2348
61.1k
2349
63.5k
  for (const SDep &Pred : SU->Preds) {
2350
63.5k
    if (
Pred.isCtrl()63.5k
)
continue250
;
2351
63.2k
    Pred.getSUnit()->isVRegCycle = true;
2352
63.2k
  }
2353
3.30M
}
2354
2355
// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2356
// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2357
29.6M
static void resetVRegCycle(SUnit *SU) {
2358
29.6M
  if (!SU->isVRegCycle)
2359
29.6M
    return;
2360
61.1k
2361
61.1k
  
for (const SDep &Pred : SU->Preds) 61.1k
{
2362
63.5k
    if (
Pred.isCtrl()63.5k
)
continue250
; // ignore chain preds
2363
63.2k
    SUnit *PredSU = Pred.getSUnit();
2364
63.2k
    if (
PredSU->isVRegCycle63.2k
) {
2365
63.0k
      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2366
63.0k
             "VRegCycle def must be CopyFromReg");
2367
63.0k
      Pred.getSUnit()->isVRegCycle = false;
2368
63.0k
    }
2369
63.5k
  }
2370
29.6M
}
2371
2372
// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2373
// means a node that defines the VRegCycle has not been scheduled yet.
2374
11.4M
static bool hasVRegCycleUse(const SUnit *SU) {
2375
11.4M
  // If this SU also defines the VReg, don't hoist it as a "use".
2376
11.4M
  if (SU->isVRegCycle)
2377
1.31k
    return false;
2378
11.4M
2379
11.4M
  
for (const SDep &Pred : SU->Preds) 11.4M
{
2380
16.6M
    if (
Pred.isCtrl()16.6M
)
continue3.96M
; // ignore chain preds
2381
12.6M
    
if (12.6M
Pred.getSUnit()->isVRegCycle &&
2382
12.6M
        
Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg3.46k
) {
2383
1.30k
      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2384
1.30k
      return true;
2385
1.30k
    }
2386
11.4M
  }
2387
11.4M
  return false;
2388
11.4M
}
2389
2390
// Check for either a dependence (latency) or resource (hazard) stall.
2391
//
2392
// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2393
10.9M
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2394
10.9M
  if (
(int)SPQ->getCurCycle() < Height10.9M
)
return true107k
;
2395
10.8M
  
if (10.8M
SPQ->getHazardRec()->getHazardType(SU, 0)
2396
10.8M
      != ScheduleHazardRecognizer::NoHazard)
2397
80.2k
    return true;
2398
10.7M
  return false;
2399
10.7M
}
2400
2401
// Return -1 if left has higher priority, 1 if right has higher priority.
2402
// Return 0 if latency-based priority is equivalent.
2403
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2404
5.72M
                            RegReductionPQBase *SPQ) {
2405
5.72M
  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2406
5.72M
  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2407
5.72M
  int LPenalty = hasVRegCycleUse(left) ? 
1558
:
05.72M
;
2408
5.72M
  int RPenalty = hasVRegCycleUse(right) ? 
1743
:
05.72M
;
2409
5.72M
  int LHeight = (int)left->getHeight() + LPenalty;
2410
5.72M
  int RHeight = (int)right->getHeight() + RPenalty;
2411
5.72M
2412
312k
  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2413
5.49M
    BUHasStall(left, LHeight, SPQ);
2414
312k
  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2415
5.49M
    BUHasStall(right, RHeight, SPQ);
2416
5.72M
2417
5.72M
  // If scheduling one of the node will cause a pipeline stall, delay it.
2418
5.72M
  // If scheduling either one of the node will cause a pipeline stall, sort
2419
5.72M
  // them according to their height.
2420
5.72M
  if (
LStall5.72M
) {
2421
90.2k
    if (!RStall)
2422
19.3k
      return 1;
2423
70.9k
    
if (70.9k
LHeight != RHeight70.9k
)
2424
10.4k
      
return LHeight > RHeight ? 10.4k
12.71k
:
-17.76k
;
2425
5.63M
  } else 
if (5.63M
RStall5.63M
)
2426
26.0k
    return -1;
2427
5.66M
2428
5.66M
  // If either node is scheduling for latency, sort them by height/depth
2429
5.66M
  // and latency.
2430
5.66M
  
if (5.66M
!checkPref || 5.66M
(left->SchedulingPref == Sched::ILP ||
2431
5.66M
                     
right->SchedulingPref == Sched::ILP207k
)) {
2432
5.46M
    // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2433
5.46M
    // is enabled, grouping instructions by cycle, then its height is already
2434
5.46M
    // covered so only its depth matters. We also reach this point if both stall
2435
5.46M
    // but have the same height.
2436
5.46M
    if (
!SPQ->getHazardRec()->isEnabled()5.46M
) {
2437
5.28M
      if (LHeight != RHeight)
2438
499k
        
return LHeight > RHeight ? 499k
1329k
:
-1170k
;
2439
4.96M
    }
2440
4.96M
    int LDepth = left->getDepth() - LPenalty;
2441
4.96M
    int RDepth = right->getDepth() - RPenalty;
2442
4.96M
    if (
LDepth != RDepth4.96M
) {
2443
670k
      DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2444
670k
            << ") depth " << LDepth << " vs SU (" << right->NodeNum
2445
670k
            << ") depth " << RDepth << "\n");
2446
670k
      return LDepth < RDepth ? 
1359k
:
-1311k
;
2447
670k
    }
2448
4.29M
    
if (4.29M
left->Latency != right->Latency4.29M
)
2449
9.61k
      
return left->Latency > right->Latency ? 9.61k
16.72k
:
-12.89k
;
2450
4.48M
  }
2451
4.48M
  return 0;
2452
4.48M
}
2453
2454
15.4M
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2455
15.4M
  // Schedule physical register definitions close to their use. This is
2456
15.4M
  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2457
15.4M
  // long as shortening physreg live ranges is generally good, we can defer
2458
15.4M
  // creating a subtarget hook.
2459
15.4M
  if (
!DisableSchedPhysRegJoin15.4M
) {
2460
15.4M
    bool LHasPhysReg = left->hasPhysRegDefs;
2461
15.4M
    bool RHasPhysReg = right->hasPhysRegDefs;
2462
15.4M
    if (
LHasPhysReg != RHasPhysReg15.4M
) {
2463
      #ifndef NDEBUG
2464
      static const char *const PhysRegMsg[] = { " has no physreg",
2465
                                                " defines a physreg" };
2466
      #endif
2467
98.4k
      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2468
98.4k
            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2469
98.4k
            << PhysRegMsg[RHasPhysReg] << "\n");
2470
98.4k
      return LHasPhysReg < RHasPhysReg;
2471
98.4k
    }
2472
15.3M
  }
2473
15.3M
2474
15.3M
  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2475
15.3M
  unsigned LPriority = SPQ->getNodePriority(left);
2476
15.3M
  unsigned RPriority = SPQ->getNodePriority(right);
2477
15.3M
2478
15.3M
  // Be really careful about hoisting call operands above previous calls.
2479
15.3M
  // Only allows it if it would reduce register pressure.
2480
15.3M
  if (
left->isCall && 15.3M
right->isCallOp35.4k
) {
2481
1.77k
    unsigned RNumVals = right->getNode()->getNumValues();
2482
1.77k
    RPriority = (RPriority > RNumVals) ? 
(RPriority - RNumVals)517
:
01.25k
;
2483
1.77k
  }
2484
15.3M
  if (
right->isCall && 15.3M
left->isCallOp96.8k
) {
2485
1.68k
    unsigned LNumVals = left->getNode()->getNumValues();
2486
1.68k
    LPriority = (LPriority > LNumVals) ? 
(LPriority - LNumVals)418
:
01.26k
;
2487
1.68k
  }
2488
15.3M
2489
15.3M
  if (LPriority != RPriority)
2490
2.94M
    return LPriority > RPriority;
2491
12.3M
2492
12.3M
  // One or both of the nodes are calls and their sethi-ullman numbers are the
2493
12.3M
  // same, then keep source order.
2494
12.3M
  
if (12.3M
left->isCall || 12.3M
right->isCall12.3M
) {
2495
13.2k
    unsigned LOrder = SPQ->getNodeOrdering(left);
2496
13.2k
    unsigned ROrder = SPQ->getNodeOrdering(right);
2497
13.2k
2498
13.2k
    // Prefer an ordering where the lower the non-zero order number, the higher
2499
13.2k
    // the preference.
2500
13.2k
    if (
(LOrder || 13.2k
ROrder23
) &&
LOrder != ROrder13.2k
)
2501
2.66k
      
return LOrder != 0 && 2.66k
(LOrder < ROrder || 2.64k
ROrder == 01.33k
);
2502
12.3M
  }
2503
12.3M
2504
12.3M
  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2505
12.3M
  // e.g.
2506
12.3M
  // t1 = op t2, c1
2507
12.3M
  // t3 = op t4, c2
2508
12.3M
  //
2509
12.3M
  // and the following instructions are both ready.
2510
12.3M
  // t2 = op c3
2511
12.3M
  // t4 = op c4
2512
12.3M
  //
2513
12.3M
  // Then schedule t2 = op first.
2514
12.3M
  // i.e.
2515
12.3M
  // t4 = op c4
2516
12.3M
  // t2 = op c3
2517
12.3M
  // t1 = op t2, c1
2518
12.3M
  // t3 = op t4, c2
2519
12.3M
  //
2520
12.3M
  // This creates more short live intervals.
2521
12.3M
  unsigned LDist = closestSucc(left);
2522
12.3M
  unsigned RDist = closestSucc(right);
2523
12.3M
  if (LDist != RDist)
2524
6.81M
    return LDist < RDist;
2525
5.57M
2526
5.57M
  // How many registers becomes live when the node is scheduled.
2527
5.57M
  unsigned LScratch = calcMaxScratches(left);
2528
5.57M
  unsigned RScratch = calcMaxScratches(right);
2529
5.57M
  if (LScratch != RScratch)
2530
159k
    return LScratch > RScratch;
2531
5.41M
2532
5.41M
  // Comparing latency against a call makes little sense unless the node
2533
5.41M
  // is register pressure-neutral.
2534
5.41M
  
if (5.41M
(left->isCall && 5.41M
RPriority > 07.03k
) ||
(right->isCall && 5.40M
LPriority > 031
))
2535
7.03k
    return (left->NodeQueueId > right->NodeQueueId);
2536
5.40M
2537
5.40M
  // Do not compare latencies when one or both of the nodes are calls.
2538
5.40M
  
if (5.40M
!DisableSchedCycles &&
2539
5.40M
      
!(left->isCall || 5.40M
right->isCall5.40M
)) {
2540
5.40M
    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2541
5.40M
    if (result != 0)
2542
1.14M
      return result > 0;
2543
5.40M
  }
2544
27
  else {
2545
27
    if (left->getHeight() != right->getHeight())
2546
0
      return left->getHeight() > right->getHeight();
2547
27
2548
27
    
if (27
left->getDepth() != right->getDepth()27
)
2549
8
      return left->getDepth() < right->getDepth();
2550
4.26M
  }
2551
4.26M
2552
5.40M
  assert(left->NodeQueueId && right->NodeQueueId &&
2553
4.26M
         "NodeQueueId cannot be zero");
2554
4.26M
  return (left->NodeQueueId > right->NodeQueueId);
2555
4.26M
}
2556
2557
// Bottom up
2558
122k
bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2559
122k
  if (int res = checkSpecialNodes(left, right))
2560
993
    return res > 0;
2561
121k
2562
121k
  return BURRSort(left, right, SPQ);
2563
121k
}
2564
2565
// Source order, otherwise bottom up.
2566
46.8M
bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2567
46.8M
  if (int res = checkSpecialNodes(left, right))
2568
587k
    return res > 0;
2569
46.2M
2570
46.2M
  unsigned LOrder = SPQ->getNodeOrdering(left);
2571
46.2M
  unsigned ROrder = SPQ->getNodeOrdering(right);
2572
46.2M
2573
46.2M
  // Prefer an ordering where the lower the non-zero order number, the higher
2574
46.2M
  // the preference.
2575
46.2M
  if (
(LOrder || 46.2M
ROrder1.57M
) &&
LOrder != ROrder46.0M
)
2576
31.3M
    
return LOrder != 0 && 31.3M
(LOrder < ROrder || 30.0M
ROrder == 013.8M
);
2577
14.9M
2578
14.9M
  return BURRSort(left, right, SPQ);
2579
14.9M
}
2580
2581
// If the time between now and when the instruction will be ready can cover
2582
// the spill code, then avoid adding it to the ready queue. This gives long
2583
// stalls highest priority and allows hoisting across calls. It should also
2584
// speed up processing the available queue.
2585
0
bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2586
0
  static const unsigned ReadyDelay = 3;
2587
0
2588
0
  if (
SPQ->MayReduceRegPressure(SU)0
)
return true0
;
2589
0
2590
0
  
if (0
SU->getHeight() > (CurCycle + ReadyDelay)0
)
return false0
;
2591
0
2592
0
  
if (0
SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2593
0
      != ScheduleHazardRecognizer::NoHazard)
2594
0
    return false;
2595
0
2596
0
  return true;
2597
0
}
2598
2599
// Return true if right should be scheduled with higher priority than left.
2600
347k
bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2601
347k
  if (int res = checkSpecialNodes(left, right))
2602
7.08k
    return res > 0;
2603
340k
2604
340k
  
if (340k
left->isCall || 340k
right->isCall337k
)
2605
340k
    // No way to compute latency of calls.
2606
7.34k
    return BURRSort(left, right, SPQ);
2607
333k
2608
333k
  bool LHigh = SPQ->HighRegPressure(left);
2609
333k
  bool RHigh = SPQ->HighRegPressure(right);
2610
333k
  // Avoid causing spills. If register pressure is high, schedule for
2611
333k
  // register pressure reduction.
2612
333k
  if (
LHigh && 333k
!RHigh8.58k
) {
2613
1.72k
    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2614
1.72k
          << right->NodeNum << ")\n");
2615
1.72k
    return true;
2616
1.72k
  }
2617
331k
  else 
if (331k
!LHigh && 331k
RHigh324k
) {
2618
11.9k
    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2619
331k
          << left->NodeNum << ")\n");
2620
331k
    return false;
2621
331k
  }
2622
319k
  
if (319k
!LHigh && 319k
!RHigh312k
) {
2623
312k
    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2624
312k
    if (result != 0)
2625
90.2k
      return result > 0;
2626
229k
  }
2627
229k
  return BURRSort(left, right, SPQ);
2628
229k
}
2629
2630
// Schedule as many instructions in each cycle as possible. So don't make an
2631
// instruction available unless it is ready in the current cycle.
2632
0
bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2633
0
  if (
SU->getHeight() > CurCycle0
)
return false0
;
2634
0
2635
0
  
if (0
SPQ->getHazardRec()->getHazardType(SU, 0)
2636
0
      != ScheduleHazardRecognizer::NoHazard)
2637
0
    return false;
2638
0
2639
0
  return true;
2640
0
}
2641
2642
34.1k
static bool canEnableCoalescing(SUnit *SU) {
2643
34.1k
  unsigned Opc = SU->getNode() ? 
SU->getNode()->getOpcode()34.1k
:
00
;
2644
34.1k
  if (
Opc == ISD::TokenFactor || 34.1k
Opc == ISD::CopyToReg34.1k
)
2645
34.1k
    // CopyToReg should be close to its uses to facilitate coalescing and
2646
34.1k
    // avoid spilling.
2647
8.23k
    return true;
2648
25.9k
2649
25.9k
  
if (25.9k
Opc == TargetOpcode::EXTRACT_SUBREG ||
2650
25.9k
      Opc == TargetOpcode::SUBREG_TO_REG ||
2651
25.9k
      Opc == TargetOpcode::INSERT_SUBREG)
2652
25.9k
    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2653
25.9k
    // close to their uses to facilitate coalescing.
2654
0
    return true;
2655
25.9k
2656
25.9k
  
if (25.9k
SU->NumPreds == 0 && 25.9k
SU->NumSuccs != 00
)
2657
25.9k
    // If SU does not have a register def, schedule it close to its uses
2658
25.9k
    // because it does not lengthen any live ranges.
2659
0
    return true;
2660
25.9k
2661
25.9k
  return false;
2662
25.9k
}
2663
2664
// list-ilp is currently an experimental scheduler that allows various
2665
// heuristics to be enabled prior to the normal register reduction logic.
2666
152k
bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2667
152k
  if (int res = checkSpecialNodes(left, right))
2668
1.88k
    return res > 0;
2669
150k
2670
150k
  
if (150k
left->isCall || 150k
right->isCall148k
)
2671
150k
    // No way to compute latency of calls.
2672
3.78k
    return BURRSort(left, right, SPQ);
2673
146k
2674
146k
  unsigned LLiveUses = 0, RLiveUses = 0;
2675
146k
  int LPDiff = 0, RPDiff = 0;
2676
146k
  if (
!DisableSchedRegPressure || 146k
!DisableSchedLiveUses0
) {
2677
146k
    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2678
146k
    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2679
146k
  }
2680
146k
  if (
!DisableSchedRegPressure && 146k
LPDiff != RPDiff146k
) {
2681
17.0k
    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2682
17.0k
          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2683
17.0k
    return LPDiff > RPDiff;
2684
17.0k
  }
2685
129k
2686
129k
  
if (129k
!DisableSchedRegPressure && 129k
(LPDiff > 0 || 129k
RPDiff > 0112k
)) {
2687
17.0k
    bool LReduce = canEnableCoalescing(left);
2688
17.0k
    bool RReduce = canEnableCoalescing(right);
2689
17.0k
    if (
LReduce && 17.0k
!RReduce4.29k
)
return false413
;
2690
16.6k
    
if (16.6k
RReduce && 16.6k
!LReduce3.93k
)
return true60
;
2691
129k
  }
2692
129k
2693
129k
  
if (129k
!DisableSchedLiveUses && 129k
(LLiveUses != RLiveUses)0
) {
2694
0
    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2695
0
          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2696
0
    return LLiveUses < RLiveUses;
2697
0
  }
2698
129k
2699
129k
  
if (129k
!DisableSchedStalls129k
) {
2700
0
    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2701
0
    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2702
0
    if (LStall != RStall)
2703
0
      return left->getHeight() > right->getHeight();
2704
129k
  }
2705
129k
2706
129k
  
if (129k
!DisableSchedCriticalPath129k
) {
2707
129k
    int spread = (int)left->getDepth() - (int)right->getDepth();
2708
129k
    if (
std::abs(spread) > MaxReorderWindow129k
) {
2709
2.46k
      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2710
2.46k
            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2711
2.46k
            << right->getDepth() << "\n");
2712
2.46k
      return left->getDepth() < right->getDepth();
2713
2.46k
    }
2714
126k
  }
2715
126k
2716
126k
  
if (126k
!DisableSchedHeight && 126k
left->getHeight() != right->getHeight()126k
) {
2717
47.8k
    int spread = (int)left->getHeight() - (int)right->getHeight();
2718
47.8k
    if (std::abs(spread) > MaxReorderWindow)
2719
8.75k
      return left->getHeight() > right->getHeight();
2720
118k
  }
2721
118k
2722
118k
  return BURRSort(left, right, SPQ);
2723
118k
}
2724
2725
3.42M
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2726
3.42M
  SUnits = &sunits;
2727
3.42M
  // Add pseudo dependency edges for two-address nodes.
2728
3.42M
  if (!Disable2AddrHack)
2729
0
    AddPseudoTwoAddrDeps();
2730
3.42M
  // Reroute edges to nodes with multiple uses.
2731
3.42M
  if (
!TracksRegPressure && 3.42M
!SrcOrder3.37M
)
2732
17.5k
    PrescheduleNodesWithMultipleUses();
2733
3.42M
  // Calculate node priorities.
2734
3.42M
  CalculateSethiUllmanNumbers();
2735
3.42M
2736
3.42M
  // For single block loops, mark nodes that look like canonical IV increments.
2737
3.42M
  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2738
188k
    for (SUnit &SU : sunits)
2739
3.30M
      initVRegCycle(&SU);
2740
3.42M
}
2741
2742
//===----------------------------------------------------------------------===//
2743
//                    Preschedule for Register Pressure
2744
//===----------------------------------------------------------------------===//
2745
2746
0
bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2747
0
  if (
SU->isTwoAddress0
) {
2748
0
    unsigned Opc = SU->getNode()->getMachineOpcode();
2749
0
    const MCInstrDesc &MCID = TII->get(Opc);
2750
0
    unsigned NumRes = MCID.getNumDefs();
2751
0
    unsigned NumOps = MCID.getNumOperands() - NumRes;
2752
0
    for (unsigned i = 0; 
i != NumOps0
;
++i0
) {
2753
0
      if (
MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -10
) {
2754
0
        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2755
0
        if (DU->getNodeId() != -1 &&
2756
0
            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2757
0
          return true;
2758
0
      }
2759
0
    }
2760
0
  }
2761
0
  return false;
2762
0
}
2763
2764
/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2765
/// successor's explicit physregs whose definition can reach DepSU.
2766
/// i.e. DepSU should not be scheduled above SU.
2767
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2768
                                         ScheduleDAGRRList *scheduleDAG,
2769
                                         const TargetInstrInfo *TII,
2770
0
                                         const TargetRegisterInfo *TRI) {
2771
0
  const MCPhysReg *ImpDefs
2772
0
    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2773
0
  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2774
0
  if(
!ImpDefs && 0
!RegMask0
)
2775
0
    return false;
2776
0
2777
0
  
for (const SDep &Succ : SU->Succs) 0
{
2778
0
    SUnit *SuccSU = Succ.getSUnit();
2779
0
    for (const SDep &SuccPred : SuccSU->Preds) {
2780
0
      if (!SuccPred.isAssignedRegDep())
2781
0
        continue;
2782
0
2783
0
      
if (0
RegMask &&
2784
0
          MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2785
0
          scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2786
0
        return true;
2787
0
2788
0
      
if (0
ImpDefs0
)
2789
0
        
for (const MCPhysReg *ImpDef = ImpDefs; 0
*ImpDef0
;
++ImpDef0
)
2790
0
          // Return true if SU clobbers this physical register use and the
2791
0
          // definition of the register reaches from DepSU. IsReachable queries
2792
0
          // a topological forward sort of the DAG (following the successors).
2793
0
          
if (0
TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2794
0
              scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2795
0
            return true;
2796
0
    }
2797
0
  }
2798
0
  return false;
2799
0
}
2800
2801
/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2802
/// physical register defs.
2803
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2804
                                  const TargetInstrInfo *TII,
2805
0
                                  const TargetRegisterInfo *TRI) {
2806
0
  SDNode *N = SuccSU->getNode();
2807
0
  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2808
0
  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2809
0
  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2810
0
  for (const SDNode *SUNode = SU->getNode(); SUNode;
2811
0
       
SUNode = SUNode->getGluedNode()0
) {
2812
0
    if (!SUNode->isMachineOpcode())
2813
0
      continue;
2814
0
    const MCPhysReg *SUImpDefs =
2815
0
      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2816
0
    const uint32_t *SURegMask = getNodeRegMask(SUNode);
2817
0
    if (
!SUImpDefs && 0
!SURegMask0
)
2818
0
      continue;
2819
0
    
for (unsigned i = NumDefs, e = N->getNumValues(); 0
i != e0
;
++i0
) {
2820
0
      MVT VT = N->getSimpleValueType(i);
2821
0
      if (
VT == MVT::Glue || 0
VT == MVT::Other0
)
2822
0
        continue;
2823
0
      
if (0
!N->hasAnyUseOfValue(i)0
)
2824
0
        continue;
2825
0
      unsigned Reg = ImpDefs[i - NumDefs];
2826
0
      if (
SURegMask && 0
MachineOperand::clobbersPhysReg(SURegMask, Reg)0
)
2827
0
        return true;
2828
0
      
if (0
!SUImpDefs0
)
2829
0
        continue;
2830
0
      
for (;0
*SUImpDefs0
;
++SUImpDefs0
) {
2831
0
        unsigned SUReg = *SUImpDefs;
2832
0
        if (TRI->regsOverlap(Reg, SUReg))
2833
0
          return true;
2834
0
      }
2835
0
    }
2836
0
  }
2837
0
  return false;
2838
0
}
2839
2840
/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2841
/// are not handled well by the general register pressure reduction
2842
/// heuristics. When presented with code like this:
2843
///
2844
///      N
2845
///    / |
2846
///   /  |
2847
///  U  store
2848
///  |
2849
/// ...
2850
///
2851
/// the heuristics tend to push the store up, but since the
2852
/// operand of the store has another use (U), this would increase
2853
/// the length of that other use (the U->N edge).
2854
///
2855
/// This function transforms code like the above to route U's
2856
/// dependence through the store when possible, like this:
2857
///
2858
///      N
2859
///      ||
2860
///      ||
2861
///     store
2862
///       |
2863
///       U
2864
///       |
2865
///      ...
2866
///
2867
/// This results in the store being scheduled immediately
2868
/// after N, which shortens the U->N live range, reducing
2869
/// register pressure.
2870
///
2871
17.5k
void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2872
17.5k
  // Visit all the nodes in topological order, working top-down.
2873
130k
  for (SUnit &SU : *SUnits) {
2874
130k
    // For now, only look at nodes with no data successors, such as stores.
2875
130k
    // These are especially important, due to the heuristics in
2876
130k
    // getNodePriority for nodes with no data successors.
2877
130k
    if (SU.NumSuccs != 0)
2878
85.3k
      continue;
2879
44.9k
    // For now, only look at nodes with exactly one data predecessor.
2880
44.9k
    
if (44.9k
SU.NumPreds != 144.9k
)
2881
23.2k
      continue;
2882
21.6k
    // Avoid prescheduling copies to virtual registers, which don't behave
2883
21.6k
    // like other nodes from the perspective of scheduling heuristics.
2884
21.6k
    
if (SDNode *21.6k
N21.6k
= SU.getNode())
2885
21.6k
      
if (21.6k
N->getOpcode() == ISD::CopyToReg &&
2886
8.82k
          TargetRegisterInfo::isVirtualRegister
2887
8.82k
            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2888
8.73k
        continue;
2889
12.9k
2890
12.9k
    // Locate the single data predecessor.
2891
12.9k
    SUnit *PredSU = nullptr;
2892
12.9k
    for (const SDep &Pred : SU.Preds)
2893
15.7k
      
if (15.7k
!Pred.isCtrl()15.7k
) {
2894
12.9k
        PredSU = Pred.getSUnit();
2895
12.9k
        break;
2896
12.9k
      }
2897
12.9k
    assert(PredSU);
2898
12.9k
2899
12.9k
    // Don't rewrite edges that carry physregs, because that requires additional
2900
12.9k
    // support infrastructure.
2901
12.9k
    if (PredSU->hasPhysRegDefs)
2902
214
      continue;
2903
12.7k
    // Short-circuit the case where SU is PredSU's only data successor.
2904
12.7k
    
if (12.7k
PredSU->NumSuccs == 112.7k
)
2905
10.9k
      continue;
2906
1.80k
    // Avoid prescheduling to copies from virtual registers, which don't behave
2907
1.80k
    // like other nodes from the perspective of scheduling heuristics.
2908
1.80k
    
if (SDNode *1.80k
N1.80k
= SU.getNode())
2909
1.80k
      
if (1.80k
N->getOpcode() == ISD::CopyFromReg &&
2910
8
          TargetRegisterInfo::isVirtualRegister
2911
8
            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2912
0
        continue;
2913
1.80k
2914
1.80k
    // Perform checks on the successors of PredSU.
2915
1.80k
    
for (const SDep &PredSucc : PredSU->Succs) 1.80k
{
2916
3.21k
      SUnit *PredSuccSU = PredSucc.getSUnit();
2917
3.21k
      if (
PredSuccSU == &SU3.21k
)
continue1.28k
;
2918
1.92k
      // If PredSU has another successor with no data successors, for
2919
1.92k
      // now don't attempt to choose either over the other.
2920
1.92k
      
if (1.92k
PredSuccSU->NumSuccs == 01.92k
)
2921
1.53k
        goto outer_loop_continue;
2922
391
      // Don't break physical register dependencies.
2923
391
      
if (391
SU.hasPhysRegClobbers && 391
PredSuccSU->hasPhysRegDefs123
)
2924
0
        
if (0
canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)0
)
2925
0
          goto outer_loop_continue;
2926
391
      // Don't introduce graph cycles.
2927
391
      
if (391
scheduleDAG->IsReachable(&SU, PredSuccSU)391
)
2928
168
        goto outer_loop_continue;
2929
100
    }
2930
100
2931
100
    // Ok, the transformation is safe and the heuristics suggest it is
2932
100
    // profitable. Update the graph.
2933
100
    
DEBUG100
(dbgs() << " Prescheduling SU #" << SU.NodeNum
2934
100
                 << " next to PredSU #" << PredSU->NodeNum
2935
100
                 << " to guide scheduling in the presence of multiple uses\n");
2936
367
    for (unsigned i = 0; 
i != PredSU->Succs.size()367
;
++i267
) {
2937
267
      SDep Edge = PredSU->Succs[i];
2938
267
      assert(!Edge.isAssignedRegDep());
2939
267
      SUnit *SuccSU = Edge.getSUnit();
2940
267
      if (
SuccSU != &SU267
) {
2941
161
        Edge.setSUnit(PredSU);
2942
161
        scheduleDAG->RemovePred(SuccSU, Edge);
2943
161
        scheduleDAG->AddPred(&SU, Edge);
2944
161
        Edge.setSUnit(&SU);
2945
161
        scheduleDAG->AddPred(SuccSU, Edge);
2946
161
        --i;
2947
161
      }
2948
267
    }
2949
1.80k
  outer_loop_continue:;
2950
1.80k
  }
2951
17.5k
}
2952
2953
/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2954
/// it as a def&use operand. Add a pseudo control edge from it to the other
2955
/// node (if it won't create a cycle) so the two-address one will be scheduled
2956
/// first (lower in the schedule). If both nodes are two-address, favor the
2957
/// one that has a CopyToReg use (more likely to be a loop induction update).
2958
/// If both are two-address, but one is commutable while the other is not
2959
/// commutable, favor the one that's not commutable.
2960
0
void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2961
0
  for (SUnit &SU : *SUnits) {
2962
0
    if (!SU.isTwoAddress)
2963
0
      continue;
2964
0
2965
0
    SDNode *Node = SU.getNode();
2966
0
    if (
!Node || 0
!Node->isMachineOpcode()0
||
SU.getNode()->getGluedNode()0
)
2967
0
      continue;
2968
0
2969
0
    bool isLiveOut = hasOnlyLiveOutUses(&SU);
2970
0
    unsigned Opc = Node->getMachineOpcode();
2971
0
    const MCInstrDesc &MCID = TII->get(Opc);
2972
0
    unsigned NumRes = MCID.getNumDefs();
2973
0
    unsigned NumOps = MCID.getNumOperands() - NumRes;
2974
0
    for (unsigned j = 0; 
j != NumOps0
;
++j0
) {
2975
0
      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2976
0
        continue;
2977
0
      SDNode *DU = SU.getNode()->getOperand(j).getNode();
2978
0
      if (DU->getNodeId() == -1)
2979
0
        continue;
2980
0
      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2981
0
      if (!DUSU)
2982
0
        continue;
2983
0
      
for (const SDep &Succ : DUSU->Succs) 0
{
2984
0
        if (Succ.isCtrl())
2985
0
          continue;
2986
0
        SUnit *SuccSU = Succ.getSUnit();
2987
0
        if (SuccSU == &SU)
2988
0
          continue;
2989
0
        // Be conservative. Ignore if nodes aren't at roughly the same
2990
0
        // depth and height.
2991
0
        
if (0
SuccSU->getHeight() < SU.getHeight() &&
2992
0
            (SU.getHeight() - SuccSU->getHeight()) > 1)
2993
0
          continue;
2994
0
        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2995
0
        // constrains whatever is using the copy, instead of the copy
2996
0
        // itself. In the case that the copy is coalesced, this
2997
0
        // preserves the intent of the pseudo two-address heurietics.
2998
0
        
while (0
SuccSU->Succs.size() == 1 &&
2999
0
               SuccSU->getNode()->isMachineOpcode() &&
3000
0
               SuccSU->getNode()->getMachineOpcode() ==
3001
0
                 TargetOpcode::COPY_TO_REGCLASS)
3002
0
          SuccSU = SuccSU->Succs.front().getSUnit();
3003
0
        // Don't constrain non-instruction nodes.
3004
0
        if (
!SuccSU->getNode() || 0
!SuccSU->getNode()->isMachineOpcode()0
)
3005
0
          continue;
3006
0
        // Don't constrain nodes with physical register defs if the
3007
0
        // predecessor can clobber them.
3008
0
        
if (0
SuccSU->hasPhysRegDefs && 0
SU.hasPhysRegClobbers0
) {
3009
0
          if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3010
0
            continue;
3011
0
        }
3012
0
        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3013
0
        // these may be coalesced away. We want them close to their uses.
3014
0
        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3015
0
        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3016
0
            SuccOpc == TargetOpcode::INSERT_SUBREG ||
3017
0
            SuccOpc == TargetOpcode::SUBREG_TO_REG)
3018
0
          continue;
3019
0
        
if (0
!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3020
0
            (!canClobber(SuccSU, DUSU) ||
3021
0
             
(isLiveOut && 0
!hasOnlyLiveOutUses(SuccSU)0
) ||
3022
0
             
(!SU.isCommutable && 0
SuccSU->isCommutable0
)) &&
3023
0
            
!scheduleDAG->IsReachable(SuccSU, &SU)0
) {
3024
0
          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
3025
0
                       << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3026
0
          scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3027
0
        }
3028
0
      }
3029
0
    }
3030
0
  }
3031
0
}
3032
3033
//===----------------------------------------------------------------------===//
3034
//                         Public Constructor Functions
3035
//===----------------------------------------------------------------------===//
3036
3037
llvm::ScheduleDAGSDNodes *
3038
llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
3039
17.5k
                                 CodeGenOpt::Level OptLevel) {
3040
17.5k
  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3041
17.5k
  const TargetInstrInfo *TII = STI.getInstrInfo();
3042
17.5k
  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3043
17.5k
3044
17.5k
  BURegReductionPriorityQueue *PQ =
3045
17.5k
    new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3046
17.5k
  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3047
17.5k
  PQ->setScheduleDAG(SD);
3048
17.5k
  return SD;
3049
17.5k
}
3050
3051
llvm::ScheduleDAGSDNodes *
3052
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
3053
3.35M
                                   CodeGenOpt::Level OptLevel) {
3054
3.35M
  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3055
3.35M
  const TargetInstrInfo *TII = STI.getInstrInfo();
3056
3.35M
  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3057
3.35M
3058
3.35M
  SrcRegReductionPriorityQueue *PQ =
3059
3.35M
    new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3060
3.35M
  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3061
3.35M
  PQ->setScheduleDAG(SD);
3062
3.35M
  return SD;
3063
3.35M
}
3064
3065
llvm::ScheduleDAGSDNodes *
3066
llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
3067
37.1k
                                   CodeGenOpt::Level OptLevel) {
3068
37.1k
  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3069
37.1k
  const TargetInstrInfo *TII = STI.getInstrInfo();
3070
37.1k
  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3071
37.1k
  const TargetLowering *TLI = IS->TLI;
3072
37.1k
3073
37.1k
  HybridBURRPriorityQueue *PQ =
3074
37.1k
    new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3075
37.1k
3076
37.1k
  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3077
37.1k
  PQ->setScheduleDAG(SD);
3078
37.1k
  return SD;
3079
37.1k
}
3080
3081
llvm::ScheduleDAGSDNodes *
3082
llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
3083
15.3k
                                CodeGenOpt::Level OptLevel) {
3084
15.3k
  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3085
15.3k
  const TargetInstrInfo *TII = STI.getInstrInfo();
3086
15.3k
  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3087
15.3k
  const TargetLowering *TLI = IS->TLI;
3088
15.3k
3089
15.3k
  ILPBURRPriorityQueue *PQ =
3090
15.3k
    new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3091
15.3k
  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3092
15.3k
  PQ->setScheduleDAG(SD);
3093
15.3k
  return SD;
3094
15.3k
}