Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This implements the ScheduleDAG class, which is a base class used by
10
// scheduling implementation classes.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "ScheduleDAGSDNodes.h"
15
#include "InstrEmitter.h"
16
#include "SDNodeDbgValue.h"
17
#include "llvm/ADT/DenseMap.h"
18
#include "llvm/ADT/SmallPtrSet.h"
19
#include "llvm/ADT/SmallSet.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/CodeGen/MachineInstrBuilder.h"
23
#include "llvm/CodeGen/MachineRegisterInfo.h"
24
#include "llvm/CodeGen/SelectionDAG.h"
25
#include "llvm/CodeGen/TargetInstrInfo.h"
26
#include "llvm/CodeGen/TargetLowering.h"
27
#include "llvm/CodeGen/TargetRegisterInfo.h"
28
#include "llvm/CodeGen/TargetSubtargetInfo.h"
29
#include "llvm/Config/llvm-config.h"
30
#include "llvm/MC/MCInstrItineraries.h"
31
#include "llvm/Support/CommandLine.h"
32
#include "llvm/Support/Debug.h"
33
#include "llvm/Support/raw_ostream.h"
34
using namespace llvm;
35
36
#define DEBUG_TYPE "pre-RA-sched"
37
38
STATISTIC(LoadsClustered, "Number of loads clustered together");
39
40
// This allows the latency-based scheduler to notice high latency instructions
41
// without a target itinerary. The choice of number here has more to do with
42
// balancing scheduler heuristics than with the actual machine latency.
43
static cl::opt<int> HighLatencyCycles(
44
  "sched-high-latency-cycles", cl::Hidden, cl::init(10),
45
  cl::desc("Roughly estimate the number of cycles that 'long latency'"
46
           "instructions take for targets with no itinerary"));
47
48
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
49
    : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
50
1.24M
      InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
51
52
/// Run - perform scheduling.
53
///
54
1.24M
void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
55
1.24M
  BB = bb;
56
1.24M
  DAG = dag;
57
1.24M
58
1.24M
  // Clear the scheduler's SUnit DAG.
59
1.24M
  ScheduleDAG::clearDAG();
60
1.24M
  Sequence.clear();
61
1.24M
62
1.24M
  // Invoke the target's selection of scheduler.
63
1.24M
  Schedule();
64
1.24M
}
65
66
/// NewSUnit - Creates a new SUnit and return a ptr to it.
67
///
68
11.0M
SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
69
#ifndef NDEBUG
70
  const SUnit *Addr = nullptr;
71
  if (!SUnits.empty())
72
    Addr = &SUnits[0];
73
#endif
74
  SUnits.emplace_back(N, (unsigned)SUnits.size());
75
11.0M
  assert((Addr == nullptr || Addr == &SUnits[0]) &&
76
11.0M
         "SUnits std::vector reallocated on the fly!");
77
11.0M
  SUnits.back().OrigNode = &SUnits.back();
78
11.0M
  SUnit *SU = &SUnits.back();
79
11.0M
  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
80
11.0M
  if (!N ||
81
11.0M
      
(11.0M
N->isMachineOpcode()11.0M
&&
82
11.0M
       
N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF7.43M
))
83
60.8k
    SU->SchedulingPref = Sched::None;
84
10.9M
  else
85
10.9M
    SU->SchedulingPref = TLI.getSchedulingPreference(N);
86
11.0M
  return SU;
87
11.0M
}
88
89
625
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
90
625
  SUnit *SU = newSUnit(Old->getNode());
91
625
  SU->OrigNode = Old->OrigNode;
92
625
  SU->Latency = Old->Latency;
93
625
  SU->isVRegCycle = Old->isVRegCycle;
94
625
  SU->isCall = Old->isCall;
95
625
  SU->isCallOp = Old->isCallOp;
96
625
  SU->isTwoAddress = Old->isTwoAddress;
97
625
  SU->isCommutable = Old->isCommutable;
98
625
  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
99
625
  SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
100
625
  SU->isScheduleHigh = Old->isScheduleHigh;
101
625
  SU->isScheduleLow = Old->isScheduleLow;
102
625
  SU->SchedulingPref = Old->SchedulingPref;
103
625
  Old->isCloned = true;
104
625
  return SU;
105
625
}
106
107
/// CheckForPhysRegDependency - Check if the dependency between def and use of
108
/// a specified operand is a physical register dependency. If so, returns the
109
/// register and the cost of copying the register.
110
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
111
                                      const TargetRegisterInfo *TRI,
112
                                      const TargetInstrInfo *TII,
113
12.8M
                                      unsigned &PhysReg, int &Cost) {
114
12.8M
  if (Op != 2 || 
User->getOpcode() != ISD::CopyToReg3.20M
)
115
10.4M
    return;
116
2.38M
117
2.38M
  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
118
2.38M
  if (TargetRegisterInfo::isVirtualRegister(Reg))
119
850k
    return;
120
1.53M
121
1.53M
  unsigned ResNo = User->getOperand(2).getResNo();
122
1.53M
  if (Def->getOpcode() == ISD::CopyFromReg &&
123
1.53M
      
cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg462k
) {
124
21.9k
    PhysReg = Reg;
125
1.51M
  } else if (Def->isMachineOpcode()) {
126
1.07M
    const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
127
1.07M
    if (ResNo >= II.getNumDefs() &&
128
1.07M
        
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg356k
)
129
356k
      PhysReg = Reg;
130
1.07M
  }
131
1.53M
132
1.53M
  if (PhysReg != 0) {
133
378k
    const TargetRegisterClass *RC =
134
378k
        TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
135
378k
    Cost = RC->getCopyCost();
136
378k
  }
137
1.53M
}
138
139
// Helper for AddGlue to clone node operands.
140
static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
141
71.4k
                                SDValue ExtraOper = SDValue()) {
142
71.4k
  SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
143
71.4k
  if (ExtraOper.getNode())
144
31.8k
    Ops.push_back(ExtraOper);
145
71.4k
146
71.4k
  SDVTList VTList = DAG->getVTList(VTs);
147
71.4k
  MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
148
71.4k
149
71.4k
  // Store memory references.
150
71.4k
  SmallVector<MachineMemOperand *, 2> MMOs;
151
71.4k
  if (MN)
152
71.4k
    MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
153
71.4k
154
71.4k
  DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
155
71.4k
156
71.4k
  // Reset the memory references
157
71.4k
  if (MN)
158
71.4k
    DAG->setNodeMemRefs(MN, MMOs);
159
71.4k
}
160
161
104k
static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
162
104k
  SDNode *GlueDestNode = Glue.getNode();
163
104k
164
104k
  // Don't add glue from a node to itself.
165
104k
  if (GlueDestNode == N) 
return false9
;
166
104k
167
104k
  // Don't add a glue operand to something that already uses glue.
168
104k
  if (GlueDestNode &&
169
104k
      
N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue31.8k
) {
170
0
    return false;
171
0
  }
172
104k
  // Don't add glue to something that already has a glue value.
173
104k
  if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) 
return false33.2k
;
174
71.4k
175
71.4k
  SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
176
71.4k
  if (AddGlue)
177
31.8k
    VTs.push_back(MVT::Glue);
178
71.4k
179
71.4k
  CloneNodeWithValues(N, DAG, VTs, Glue);
180
71.4k
181
71.4k
  return true;
182
71.4k
}
183
184
// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
185
// node even though simply shrinking the value list is sufficient.
186
9
static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
187
9
  assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
188
9
          !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
189
9
         "expected an unused glue value");
190
9
191
9
  CloneNodeWithValues(N, DAG,
192
9
                      makeArrayRef(N->value_begin(), N->getNumValues() - 1));
193
9
}
194
195
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
196
/// This function finds loads of the same base and different offsets. If the
197
/// offsets are not far apart (target specific), it add MVT::Glue inputs and
198
/// outputs to ensure they are scheduled together and in order. This
199
/// optimization may benefit some targets by improving cache locality.
200
925k
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
201
925k
  SDNode *Chain = nullptr;
202
925k
  unsigned NumOps = Node->getNumOperands();
203
925k
  if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
204
887k
    Chain = Node->getOperand(NumOps-1).getNode();
205
925k
  if (!Chain)
206
37.5k
    return;
207
887k
208
887k
  // Skip any load instruction that has a tied input. There may be an additional
209
887k
  // dependency requiring a different order than by increasing offsets, and the
210
887k
  // added glue may introduce a cycle.
211
1.23M
  
auto hasTiedInput = [this](const SDNode *N) 887k
{
212
1.23M
    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
213
7.56M
    for (unsigned I = 0; I != MCID.getNumOperands(); 
++I6.33M
) {
214
6.36M
      if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
215
39.6k
        return true;
216
6.36M
    }
217
1.23M
218
1.23M
    
return false1.20M
;
219
1.23M
  };
220
887k
221
887k
  // Look for other loads of the same chain. Find loads that are loading from
222
887k
  // the same base pointer and different offsets.
223
887k
  SmallPtrSet<SDNode*, 16> Visited;
224
887k
  SmallVector<int64_t, 4> Offsets;
225
887k
  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
226
887k
  bool Cluster = false;
227
887k
  SDNode *Base = Node;
228
887k
229
887k
  if (hasTiedInput(Base))
230
39.6k
    return;
231
848k
232
848k
  // This algorithm requires a reasonably low use count before finding a match
233
848k
  // to avoid uselessly blowing up compile time in large blocks.
234
848k
  unsigned UseCount = 0;
235
848k
  for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
236
6.80M
       I != E && 
UseCount < 1005.96M
;
++I, ++UseCount5.96M
) {
237
5.96M
    SDNode *User = *I;
238
5.96M
    if (User == Node || 
!Visited.insert(User).second5.11M
)
239
887k
      continue;
240
5.07M
    int64_t Offset1, Offset2;
241
5.07M
    if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
242
5.07M
        
Offset1 == Offset2352k
||
243
5.07M
        
hasTiedInput(User)351k
) {
244
4.72M
      // FIXME: Should be ok if they addresses are identical. But earlier
245
4.72M
      // optimizations really should have eliminated one of the loads.
246
4.72M
      continue;
247
4.72M
    }
248
351k
    if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
249
44.6k
      Offsets.push_back(Offset1);
250
351k
    O2SMap.insert(std::make_pair(Offset2, User));
251
351k
    Offsets.push_back(Offset2);
252
351k
    if (Offset2 < Offset1)
253
43.1k
      Base = User;
254
351k
    Cluster = true;
255
351k
    // Reset UseCount to allow more matches.
256
351k
    UseCount = 0;
257
351k
  }
258
848k
259
848k
  if (!Cluster)
260
803k
    return;
261
44.6k
262
44.6k
  // Sort them in increasing order.
263
44.6k
  llvm::sort(Offsets);
264
44.6k
265
44.6k
  // Check if the loads are close enough.
266
44.6k
  SmallVector<SDNode*, 4> Loads;
267
44.6k
  unsigned NumLoads = 0;
268
44.6k
  int64_t BaseOff = Offsets[0];
269
44.6k
  SDNode *BaseLoad = O2SMap[BaseOff];
270
44.6k
  Loads.push_back(BaseLoad);
271
109k
  for (unsigned i = 1, e = Offsets.size(); i != e; 
++i65.1k
) {
272
80.2k
    int64_t Offset = Offsets[i];
273
80.2k
    SDNode *Load = O2SMap[Offset];
274
80.2k
    if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
275
15.1k
      break; // Stop right here. Ignore loads that are further away.
276
65.1k
    Loads.push_back(Load);
277
65.1k
    ++NumLoads;
278
65.1k
  }
279
44.6k
280
44.6k
  if (NumLoads == 0)
281
5.05k
    return;
282
39.5k
283
39.5k
  // Cluster loads by adding MVT::Glue outputs and inputs. This also
284
39.5k
  // ensure they are scheduled in order of increasing addresses.
285
39.5k
  SDNode *Lead = Loads[0];
286
39.5k
  SDValue InGlue = SDValue(nullptr, 0);
287
39.5k
  if (AddGlue(Lead, InGlue, true, DAG))
288
26.1k
    InGlue = SDValue(Lead, Lead->getNumValues() - 1);
289
104k
  for (unsigned I = 1, E = Loads.size(); I != E; 
++I65.1k
) {
290
65.1k
    bool OutGlue = I < E - 1;
291
65.1k
    SDNode *Load = Loads[I];
292
65.1k
293
65.1k
    // If AddGlue fails, we could leave an unsused glue value. This should not
294
65.1k
    // cause any
295
65.1k
    if (AddGlue(Load, InGlue, OutGlue, DAG)) {
296
45.3k
      if (OutGlue)
297
5.76k
        InGlue = SDValue(Load, Load->getNumValues() - 1);
298
45.3k
299
45.3k
      ++LoadsClustered;
300
45.3k
    }
301
19.7k
    else if (!OutGlue && 
InGlue.getNode()9
)
302
9
      RemoveUnusedGlue(InGlue.getNode(), DAG);
303
65.1k
  }
304
39.5k
}
305
306
/// ClusterNodes - Cluster certain nodes which should be scheduled together.
307
///
308
1.24M
void ScheduleDAGSDNodes::ClusterNodes() {
309
26.8M
  for (SDNode &NI : DAG->allnodes()) {
310
26.8M
    SDNode *Node = &NI;
311
26.8M
    if (!Node || !Node->isMachineOpcode())
312
18.7M
      continue;
313
8.11M
314
8.11M
    unsigned Opc = Node->getMachineOpcode();
315
8.11M
    const MCInstrDesc &MCID = TII->get(Opc);
316
8.11M
    if (MCID.mayLoad())
317
925k
      // Cluster loads from "near" addresses into combined SUnits.
318
925k
      ClusterNeighboringLoads(Node);
319
8.11M
  }
320
1.24M
}
321
322
1.24M
void ScheduleDAGSDNodes::BuildSchedUnits() {
323
1.24M
  // During scheduling, the NodeId field of SDNode is used to map SDNodes
324
1.24M
  // to their associated SUnits by holding SUnits table indices. A value
325
1.24M
  // of -1 means the SDNode does not yet have an associated SUnit.
326
1.24M
  unsigned NumNodes = 0;
327
26.8M
  for (SDNode &NI : DAG->allnodes()) {
328
26.8M
    NI.setNodeId(-1);
329
26.8M
    ++NumNodes;
330
26.8M
  }
331
1.24M
332
1.24M
  // Reserve entries in the vector for each of the SUnits we are creating.  This
333
1.24M
  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
334
1.24M
  // invalidated.
335
1.24M
  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
336
1.24M
  // This is a temporary workaround.
337
1.24M
  SUnits.reserve(NumNodes * 2);
338
1.24M
339
1.24M
  // Add all nodes in depth first order.
340
1.24M
  SmallVector<SDNode*, 64> Worklist;
341
1.24M
  SmallPtrSet<SDNode*, 32> Visited;
342
1.24M
  Worklist.push_back(DAG->getRoot().getNode());
343
1.24M
  Visited.insert(DAG->getRoot().getNode());
344
1.24M
345
1.24M
  SmallVector<SUnit*, 8> CallSUnits;
346
27.8M
  while (!Worklist.empty()) {
347
26.5M
    SDNode *NI = Worklist.pop_back_val();
348
26.5M
349
26.5M
    // Add all operands to the worklist unless they've already been added.
350
26.5M
    for (const SDValue &Op : NI->op_values())
351
42.1M
      if (Visited.insert(Op.getNode()).second)
352
25.3M
        Worklist.push_back(Op.getNode());
353
26.5M
354
26.5M
    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
355
13.2M
      continue;
356
13.2M
357
13.2M
    // If this node has already been processed, stop now.
358
13.2M
    if (NI->getNodeId() != -1) 
continue2.26M
;
359
11.0M
360
11.0M
    SUnit *NodeSUnit = newSUnit(NI);
361
11.0M
362
11.0M
    // See if anything is glued to this node, if so, add them to glued
363
11.0M
    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
364
11.0M
    // is required to be the last operand and result of a node.
365
11.0M
366
11.0M
    // Scan up to find glued preds.
367
11.0M
    SDNode *N = NI;
368
13.2M
    while (N->getNumOperands() &&
369
13.2M
           
N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue13.1M
) {
370
2.21M
      N = N->getOperand(N->getNumOperands()-1).getNode();
371
2.21M
      assert(N->getNodeId() == -1 && "Node already inserted!");
372
2.21M
      N->setNodeId(NodeSUnit->NodeNum);
373
2.21M
      if (N->isMachineOpcode() && 
TII->get(N->getMachineOpcode()).isCall()657k
)
374
394k
        NodeSUnit->isCall = true;
375
2.21M
    }
376
11.0M
377
11.0M
    // Scan down to find any glued succs.
378
11.0M
    N = NI;
379
11.0M
    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
380
922k
      SDValue GlueVal(N, N->getNumValues()-1);
381
922k
382
922k
      // There are either zero or one users of the Glue result.
383
922k
      bool HasGlueUse = false;
384
922k
      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
385
2.09M
           UI != E; 
++UI1.17M
)
386
1.22M
        if (GlueVal.isOperandOf(*UI)) {
387
54.6k
          HasGlueUse = true;
388
54.6k
          assert(N->getNodeId() == -1 && "Node already inserted!");
389
54.6k
          N->setNodeId(NodeSUnit->NodeNum);
390
54.6k
          N = *UI;
391
54.6k
          if (N->isMachineOpcode() && 
TII->get(N->getMachineOpcode()).isCall()13.1k
)
392
0
            NodeSUnit->isCall = true;
393
54.6k
          break;
394
54.6k
        }
395
922k
      if (!HasGlueUse) 
break867k
;
396
922k
    }
397
11.0M
398
11.0M
    if (NodeSUnit->isCall)
399
394k
      CallSUnits.push_back(NodeSUnit);
400
11.0M
401
11.0M
    // Schedule zero-latency TokenFactor below any nodes that may increase the
402
11.0M
    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
403
11.0M
    // have false stalls.
404
11.0M
    if (NI->getOpcode() == ISD::TokenFactor)
405
509k
      NodeSUnit->isScheduleLow = true;
406
11.0M
407
11.0M
    // If there are glue operands involved, N is now the bottom-most node
408
11.0M
    // of the sequence of nodes that are glued together.
409
11.0M
    // Update the SUnit.
410
11.0M
    NodeSUnit->setNode(N);
411
11.0M
    assert(N->getNodeId() == -1 && "Node already inserted!");
412
11.0M
    N->setNodeId(NodeSUnit->NodeNum);
413
11.0M
414
11.0M
    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
415
11.0M
    InitNumRegDefsLeft(NodeSUnit);
416
11.0M
417
11.0M
    // Assign the Latency field of NodeSUnit using target-provided information.
418
11.0M
    computeLatency(NodeSUnit);
419
11.0M
  }
420
1.24M
421
1.24M
  // Find all call operands.
422
1.63M
  while (!CallSUnits.empty()) {
423
394k
    SUnit *SU = CallSUnits.pop_back_val();
424
2.15M
    for (const SDNode *SUNode = SU->getNode(); SUNode;
425
1.76M
         SUNode = SUNode->getGluedNode()) {
426
1.76M
      if (SUNode->getOpcode() != ISD::CopyToReg)
427
1.00M
        continue;
428
756k
      SDNode *SrcN = SUNode->getOperand(2).getNode();
429
756k
      if (isPassiveNode(SrcN)) 
continue1.94k
; // Not scheduled.
430
754k
      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
431
754k
      SrcSU->isCallOp = true;
432
754k
    }
433
394k
  }
434
1.24M
}
435
436
1.24M
void ScheduleDAGSDNodes::AddSchedEdges() {
437
1.24M
  const TargetSubtargetInfo &ST = MF.getSubtarget();
438
1.24M
439
1.24M
  // Check to see if the scheduler cares about latencies.
440
1.24M
  bool UnitLatencies = forceUnitLatencies();
441
1.24M
442
1.24M
  // Pass 2: add the preds, succs, etc.
443
12.2M
  for (unsigned su = 0, e = SUnits.size(); su != e; 
++su11.0M
) {
444
11.0M
    SUnit *SU = &SUnits[su];
445
11.0M
    SDNode *MainNode = SU->getNode();
446
11.0M
447
11.0M
    if (MainNode->isMachineOpcode()) {
448
7.39M
      unsigned Opc = MainNode->getMachineOpcode();
449
7.39M
      const MCInstrDesc &MCID = TII->get(Opc);
450
28.8M
      for (unsigned i = 0; i != MCID.getNumOperands(); 
++i21.4M
) {
451
21.9M
        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
452
526k
          SU->isTwoAddress = true;
453
526k
          break;
454
526k
        }
455
21.9M
      }
456
7.39M
      if (MCID.isCommutable())
457
292k
        SU->isCommutable = true;
458
7.39M
    }
459
11.0M
460
11.0M
    // Find all predecessors and successors of the group.
461
24.3M
    for (SDNode *N = SU->getNode(); N; 
N = N->getGluedNode()13.2M
) {
462
13.2M
      if (N->isMachineOpcode() &&
463
13.2M
          
TII->get(N->getMachineOpcode()).getImplicitDefs()8.10M
) {
464
1.87M
        SU->hasPhysRegClobbers = true;
465
1.87M
        unsigned NumUsed = InstrEmitter::CountResults(N);
466
3.32M
        while (NumUsed != 0 && 
!N->hasAnyUseOfValue(NumUsed - 1)2.17M
)
467
1.44M
          --NumUsed;    // Skip over unused values at the end.
468
1.87M
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
469
352k
          SU->hasPhysRegDefs = true;
470
1.87M
      }
471
13.2M
472
55.3M
      for (unsigned i = 0, e = N->getNumOperands(); i != e; 
++i42.1M
) {
473
42.1M
        SDNode *OpN = N->getOperand(i).getNode();
474
42.1M
        if (isPassiveNode(OpN)) 
continue24.8M
; // Not scheduled.
475
17.2M
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
476
17.2M
        assert(OpSU && "Node has no SUnit!");
477
17.2M
        if (OpSU == SU) 
continue4.36M
; // In the same group.
478
12.8M
479
12.8M
        EVT OpVT = N->getOperand(i).getValueType();
480
12.8M
        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
481
12.8M
        bool isChain = OpVT == MVT::Other;
482
12.8M
483
12.8M
        unsigned PhysReg = 0;
484
12.8M
        int Cost = 1;
485
12.8M
        // Determine if this is a physical register dependency.
486
12.8M
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
487
12.8M
        assert((PhysReg == 0 || !isChain) &&
488
12.8M
               "Chain dependence via physreg data?");
489
12.8M
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
490
12.8M
        // emits a copy from the physical register to a virtual register unless
491
12.8M
        // it requires a cross class copy (cost < 0). That means we are only
492
12.8M
        // treating "expensive to copy" register dependency as physical register
493
12.8M
        // dependency. This may change in the future though.
494
12.8M
        if (Cost >= 0 && 
!StressSched0
)
495
12.4M
          PhysReg = 0;
496
12.8M
497
12.8M
        // If this is a ctrl dep, latency is 1.
498
12.8M
        unsigned OpLatency = isChain ? 
14.16M
:
OpSU->Latency8.68M
;
499
12.8M
        // Special-case TokenFactor chains as zero-latency.
500
12.8M
        if(isChain && 
OpN->getOpcode() == ISD::TokenFactor4.16M
)
501
475k
          OpLatency = 0;
502
12.8M
503
12.8M
        SDep Dep = isChain ? 
SDep(OpSU, SDep::Barrier)4.16M
504
12.8M
          : 
SDep(OpSU, SDep::Data, PhysReg)8.68M
;
505
12.8M
        Dep.setLatency(OpLatency);
506
12.8M
        if (!isChain && 
!UnitLatencies8.68M
) {
507
523k
          computeOperandLatency(OpN, N, i, Dep);
508
523k
          ST.adjustSchedDependency(OpSU, SU, Dep);
509
523k
        }
510
12.8M
511
12.8M
        if (!SU->addPred(Dep) && 
!Dep.isCtrl()194k
&&
OpSU->NumRegDefsLeft > 1167k
) {
512
13.3k
          // Multiple register uses are combined in the same SUnit. For example,
513
13.3k
          // we could have a set of glued nodes with all their defs consumed by
514
13.3k
          // another set of glued nodes. Register pressure tracking sees this as
515
13.3k
          // a single use, so to keep pressure balanced we reduce the defs.
516
13.3k
          //
517
13.3k
          // We can't tell (without more book-keeping) if this results from
518
13.3k
          // glued nodes or duplicate operands. As long as we don't reduce
519
13.3k
          // NumRegDefsLeft to zero, we handle the common cases well.
520
13.3k
          --OpSU->NumRegDefsLeft;
521
13.3k
        }
522
12.8M
      }
523
13.2M
    }
524
11.0M
  }
525
1.24M
}
526
527
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
528
/// are input.  This SUnit graph is similar to the SelectionDAG, but
529
/// excludes nodes that aren't interesting to scheduling, and represents
530
/// glued together nodes with a single SUnit.
531
1.24M
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
532
1.24M
  // Cluster certain nodes which should be scheduled together.
533
1.24M
  ClusterNodes();
534
1.24M
  // Populate the SUnits array.
535
1.24M
  BuildSchedUnits();
536
1.24M
  // Compute all the scheduling dependencies between nodes.
537
1.24M
  AddSchedEdges();
538
1.24M
}
539
540
// Initialize NumNodeDefs for the current Node's opcode.
541
15.7M
void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
542
15.7M
  // Check for phys reg copy.
543
15.7M
  if (!Node)
544
0
    return;
545
15.7M
546
15.7M
  if (!Node->isMachineOpcode()) {
547
6.08M
    if (Node->getOpcode() == ISD::CopyFromReg)
548
2.83M
      NodeNumDefs = 1;
549
3.25M
    else
550
3.25M
      NodeNumDefs = 0;
551
6.08M
    return;
552
6.08M
  }
553
9.63M
  unsigned POpc = Node->getMachineOpcode();
554
9.63M
  if (POpc == TargetOpcode::IMPLICIT_DEF) {
555
62.1k
    // No register need be allocated for this.
556
62.1k
    NodeNumDefs = 0;
557
62.1k
    return;
558
62.1k
  }
559
9.56M
  if (POpc == TargetOpcode::PATCHPOINT &&
560
9.56M
      
Node->getValueType(0) == MVT::Other146
) {
561
99
    // PATCHPOINT is defined to have one result, but it might really have none
562
99
    // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
563
99
    // real definition.
564
99
    NodeNumDefs = 0;
565
99
    return;
566
99
  }
567
9.56M
  unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
568
9.56M
  // Some instructions define regs that are not represented in the selection DAG
569
9.56M
  // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
570
9.56M
  NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
571
9.56M
  DefIdx = 0;
572
9.56M
}
573
574
// Construct a RegDefIter for this SUnit and find the first valid value.
575
ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
576
                                           const ScheduleDAGSDNodes *SD)
577
13.0M
  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
578
13.0M
  InitNodeNumDefs();
579
13.0M
  Advance();
580
13.0M
}
581
582
// Advance to the next valid value defined by the SUnit.
583
20.6M
void ScheduleDAGSDNodes::RegDefIter::Advance() {
584
23.3M
  for (;
Node23.3M
;) { // Visit all glued nodes.
585
23.6M
    for (;DefIdx < NodeNumDefs; 
++DefIdx296k
) {
586
8.34M
      if (!Node->hasAnyUseOfValue(DefIdx))
587
296k
        continue;
588
8.05M
      ValueType = Node->getSimpleValueType(DefIdx);
589
8.05M
      ++DefIdx;
590
8.05M
      return; // Found a normal regdef.
591
8.05M
    }
592
23.3M
    Node = Node->getGluedNode();
593
15.2M
    if (!Node) {
594
12.6M
      return; // No values left to visit.
595
12.6M
    }
596
2.63M
    InitNodeNumDefs();
597
2.63M
  }
598
20.6M
}
599
600
11.0M
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
601
11.0M
  assert(SU->NumRegDefsLeft == 0 && "expect a new node");
602
17.2M
  for (RegDefIter I(SU, this); I.IsValid(); 
I.Advance()6.26M
) {
603
6.26M
    assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
604
6.26M
    ++SU->NumRegDefsLeft;
605
6.26M
  }
606
11.0M
}
607
608
11.0M
void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
609
11.0M
  SDNode *N = SU->getNode();
610
11.0M
611
11.0M
  // TokenFactor operands are considered zero latency, and some schedulers
612
11.0M
  // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
613
11.0M
  // whenever node latency is nonzero.
614
11.0M
  if (
N11.0M
&& N->getOpcode() == ISD::TokenFactor) {
615
509k
    SU->Latency = 0;
616
509k
    return;
617
509k
  }
618
10.5M
619
10.5M
  // Check to see if the scheduler cares about latencies.
620
10.5M
  if (forceUnitLatencies()) {
621
9.84M
    SU->Latency = 1;
622
9.84M
    return;
623
9.84M
  }
624
673k
625
673k
  if (!InstrItins || 
InstrItins->isEmpty()630k
) {
626
154k
    if (N && N->isMachineOpcode() &&
627
154k
        
TII->isHighLatencyDef(N->getMachineOpcode())119k
)
628
0
      SU->Latency = HighLatencyCycles;
629
154k
    else
630
154k
      SU->Latency = 1;
631
154k
    return;
632
154k
  }
633
518k
634
518k
  // Compute the latency for the node.  We use the sum of the latencies for
635
518k
  // all nodes glued together into this SUnit.
636
518k
  SU->Latency = 0;
637
1.21M
  for (SDNode *N = SU->getNode(); N; 
N = N->getGluedNode()692k
)
638
692k
    if (N->isMachineOpcode())
639
440k
      SU->Latency += TII->getInstrLatency(InstrItins, N);
640
518k
}
641
642
void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
643
523k
                                               unsigned OpIdx, SDep& dep) const{
644
523k
  // Check to see if the scheduler cares about latencies.
645
523k
  if (forceUnitLatencies())
646
0
    return;
647
523k
648
523k
  if (dep.getKind() != SDep::Data)
649
0
    return;
650
523k
651
523k
  unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
652
523k
  if (Use->isMachineOpcode())
653
370k
    // Adjust the use operand index by num of defs.
654
370k
    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
655
523k
  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
656
523k
  if (Latency > 1 && 
Use->getOpcode() == ISD::CopyToReg66.9k
&&
657
523k
      
!BB->succ_empty()10.2k
) {
658
2.77k
    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
659
2.77k
    if (TargetRegisterInfo::isVirtualRegister(Reg))
660
2.35k
      // This copy is a liveout value. It is likely coalesced, so reduce the
661
2.35k
      // latency so not to penalize the def.
662
2.35k
      // FIXME: need target specific adjustment here?
663
2.35k
      Latency = (Latency > 1) ? Latency - 1 : 
10
;
664
2.77k
  }
665
523k
  if (Latency >= 0)
666
352k
    dep.setLatency(Latency);
667
523k
}
668
669
0
void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
670
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
671
  dumpNodeName(SU);
672
  dbgs() << ": ";
673
674
  if (!SU.getNode()) {
675
    dbgs() << "PHYS REG COPY\n";
676
    return;
677
  }
678
679
  SU.getNode()->dump(DAG);
680
  dbgs() << "\n";
681
  SmallVector<SDNode *, 4> GluedNodes;
682
  for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
683
    GluedNodes.push_back(N);
684
  while (!GluedNodes.empty()) {
685
    dbgs() << "    ";
686
    GluedNodes.back()->dump(DAG);
687
    dbgs() << "\n";
688
    GluedNodes.pop_back();
689
  }
690
#endif
691
}
692
693
0
void ScheduleDAGSDNodes::dump() const {
694
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
695
  if (EntrySU.getNode() != nullptr)
696
    dumpNodeAll(EntrySU);
697
  for (const SUnit &SU : SUnits)
698
    dumpNodeAll(SU);
699
  if (ExitSU.getNode() != nullptr)
700
    dumpNodeAll(ExitSU);
701
#endif
702
}
703
704
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
705
void ScheduleDAGSDNodes::dumpSchedule() const {
706
  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
707
    if (SUnit *SU = Sequence[i])
708
      dumpNode(*SU);
709
    else
710
      dbgs() << "**** NOOP ****\n";
711
  }
712
}
713
#endif
714
715
#ifndef NDEBUG
716
/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
717
/// their state is consistent with the nodes listed in Sequence.
718
///
719
void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
720
  unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
721
  unsigned Noops = 0;
722
  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
723
    if (!Sequence[i])
724
      ++Noops;
725
  assert(Sequence.size() - Noops == ScheduledNodes &&
726
         "The number of nodes scheduled doesn't match the expected number!");
727
}
728
#endif // NDEBUG
729
730
/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
731
static void
732
ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
733
                   SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
734
4.36k
                   DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
735
4.36k
  if (!N->getHasDebugValue())
736
4.04k
    return;
737
315
738
315
  // Opportunistically insert immediate dbg_value uses, i.e. those with the same
739
315
  // source order number as N.
740
315
  MachineBasicBlock *BB = Emitter.getBlock();
741
315
  MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
742
343
  for (auto DV : DAG->GetDbgValues(N)) {
743
343
    if (DV->isEmitted())
744
0
      continue;
745
343
    unsigned DVOrder = DV->getOrder();
746
343
    if (!Order || 
DVOrder == Order183
) {
747
309
      MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
748
309
      if (DbgMI) {
749
309
        Orders.push_back({DVOrder, DbgMI});
750
309
        BB->insert(InsertPos, DbgMI);
751
309
      }
752
309
    }
753
343
  }
754
315
}
755
756
// ProcessSourceNode - Process nodes with source order numbers. These are added
757
// to a vector which EmitSchedule uses to determine how to insert dbg_value
758
// instructions in the right order.
759
static void
760
ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
761
                  DenseMap<SDValue, unsigned> &VRBaseMap,
762
                  SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
763
4.36k
                  SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) {
764
4.36k
  unsigned Order = N->getIROrder();
765
4.36k
  if (!Order || 
Seen.count(Order)4.03k
) {
766
2.48k
    // Process any valid SDDbgValues even if node does not have any order
767
2.48k
    // assigned.
768
2.48k
    ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
769
2.48k
    return;
770
2.48k
  }
771
1.87k
772
1.87k
  // If a new instruction was generated for this Order number, record it.
773
1.87k
  // Otherwise, leave this order number unseen: we will either find later
774
1.87k
  // instructions for it, or leave it unseen if there were no instructions at
775
1.87k
  // all.
776
1.87k
  if (NewInsn) {
777
1.41k
    Seen.insert(Order);
778
1.41k
    Orders.push_back({Order, NewInsn});
779
1.41k
  }
780
1.87k
781
1.87k
  // Even if no instruction was generated, a Value may have become defined via
782
1.87k
  // earlier nodes. Try to process them now.
783
1.87k
  ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
784
1.87k
}
785
786
void ScheduleDAGSDNodes::
787
EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
788
132
                MachineBasicBlock::iterator InsertPos) {
789
132
  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
790
132
       I != E; 
++I0
) {
791
132
    if (I->isCtrl()) 
continue0
; // ignore chain preds
792
132
    if (I->getSUnit()->CopyDstRC) {
793
66
      // Copy to physical register.
794
66
      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
795
66
      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
796
66
      // Find the destination physical register.
797
66
      unsigned Reg = 0;
798
66
      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
799
81
             EE = SU->Succs.end(); II != EE; 
++II15
) {
800
81
        if (II->isCtrl()) 
continue1
; // ignore chain preds
801
80
        if (II->getReg()) {
802
66
          Reg = II->getReg();
803
66
          break;
804
66
        }
805
80
      }
806
66
      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
807
66
        .addReg(VRI->second);
808
66
    } else {
809
66
      // Copy from physical register.
810
66
      assert(I->getReg() && "Unknown physical register!");
811
66
      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
812
66
      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
813
66
      (void)isNew; // Silence compiler warning.
814
66
      assert(isNew && "Node emitted out of order - early");
815
66
      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
816
66
        .addReg(I->getReg());
817
66
    }
818
132
    break;
819
132
  }
820
132
}
821
822
/// EmitSchedule - Emit the machine code in scheduled order. Return the new
823
/// InsertPos and MachineBasicBlock that contains this insertion
824
/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
825
/// not necessarily refer to returned BB. The emitter may split blocks.
826
MachineBasicBlock *ScheduleDAGSDNodes::
827
1.24M
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
828
1.24M
  InstrEmitter Emitter(BB, InsertPos);
829
1.24M
  DenseMap<SDValue, unsigned> VRBaseMap;
830
1.24M
  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
831
1.24M
  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
832
1.24M
  SmallSet<unsigned, 8> Seen;
833
1.24M
  bool HasDbg = DAG->hasDebugValues();
834
1.24M
835
1.24M
  // Emit a node, and determine where its first instruction is for debuginfo.
836
1.24M
  // Zero, one, or multiple instructions can be created when emitting a node.
837
1.24M
  auto EmitNode =
838
1.24M
      [&](SDNode *Node, bool IsClone, bool IsCloned,
839
13.2M
          DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
840
13.2M
    // Fetch instruction prior to this, or end() if nonexistant.
841
26.5M
    auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
842
26.5M
      if (I == BB->begin())
843
2.90M
        return BB->end();
844
23.6M
      else
845
23.6M
        return std::prev(Emitter.getInsertPos());
846
26.5M
    };
847
13.2M
848
13.2M
    MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
849
13.2M
    Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
850
13.2M
    MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());
851
13.2M
852
13.2M
    // If the iterator did not change, no instructions were inserted.
853
13.2M
    if (Before == After)
854
3.02M
      return nullptr;
855
10.2M
856
10.2M
    MachineInstr *MI;
857
10.2M
    if (Before == BB->end()) {
858
1.06M
      // There were no prior instructions; the new ones must start at the
859
1.06M
      // beginning of the block.
860
1.06M
      MI = &Emitter.getBlock()->instr_front();
861
9.19M
    } else {
862
9.19M
      // Return first instruction after the pre-existing instructions.
863
9.19M
      MI = &*std::next(Before);
864
9.19M
    }
865
10.2M
866
10.2M
    if (MI->isCall() && 
DAG->getTarget().Options.EnableDebugEntryValues460k
)
867
1
      MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
868
10.2M
869
10.2M
    return MI;
870
10.2M
  };
871
1.24M
872
1.24M
  // If this is the first BB, emit byval parameter dbg_value's.
873
1.24M
  if (HasDbg && 
BB->getParent()->begin() == MachineFunction::iterator(BB)340
) {
874
239
    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
875
239
    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
876
242
    for (; PDI != PDE; 
++PDI3
) {
877
3
      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
878
3
      if (DbgMI) {
879
3
        BB->insert(InsertPos, DbgMI);
880
3
        // We re-emit the dbg_value closer to its use, too, after instructions
881
3
        // are emitted to the BB.
882
3
        (*PDI)->clearIsEmitted();
883
3
      }
884
3
    }
885
239
  }
886
1.24M
887
12.2M
  for (unsigned i = 0, e = Sequence.size(); i != e; 
i++11.0M
) {
888
11.0M
    SUnit *SU = Sequence[i];
889
11.0M
    if (!SU) {
890
0
      // Null SUnit* is a noop.
891
0
      TII->insertNoop(*Emitter.getBlock(), InsertPos);
892
0
      continue;
893
0
    }
894
11.0M
895
11.0M
    // For pre-regalloc scheduling, create instructions corresponding to the
896
11.0M
    // SDNode and any glued SDNodes and append them to the block.
897
11.0M
    if (!SU->getNode()) {
898
132
      // Emit a copy.
899
132
      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
900
132
      continue;
901
132
    }
902
11.0M
903
11.0M
    SmallVector<SDNode *, 4> GluedNodes;
904
13.2M
    for (SDNode *N = SU->getNode()->getGluedNode(); N; 
N = N->getGluedNode()2.26M
)
905
2.26M
      GluedNodes.push_back(N);
906
13.2M
    while (!GluedNodes.empty()) {
907
2.26M
      SDNode *N = GluedNodes.back();
908
2.26M
      auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
909
2.26M
      // Remember the source order of the inserted instruction.
910
2.26M
      if (HasDbg)
911
1.02k
        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
912
2.26M
      GluedNodes.pop_back();
913
2.26M
    }
914
11.0M
    auto NewInsn =
915
11.0M
        EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
916
11.0M
    // Remember the source order of the inserted instruction.
917
11.0M
    if (HasDbg)
918
3.34k
      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
919
3.34k
                        NewInsn);
920
11.0M
  }
921
1.24M
922
1.24M
  // Insert all the dbg_values which have not already been inserted in source
923
1.24M
  // order sequence.
924
1.24M
  if (HasDbg) {
925
340
    MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
926
340
927
340
    // Sort the source order instructions and use the order to insert debug
928
340
    // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
929
340
    // regardless of the host's implementation fo std::sort.
930
340
    llvm::stable_sort(Orders, less_first());
931
340
    std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
932
16.5k
                     [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
933
16.5k
                       return LHS->getOrder() < RHS->getOrder();
934
16.5k
                     });
935
340
936
340
    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
937
340
    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
938
340
    // Now emit the rest according to source order.
939
340
    unsigned LastOrder = 0;
940
1.65k
    for (unsigned i = 0, e = Orders.size(); i != e && 
DI != DE1.51k
;
++i1.31k
) {
941
1.31k
      unsigned Order = Orders[i].first;
942
1.31k
      MachineInstr *MI = Orders[i].second;
943
1.31k
      // Insert all SDDbgValue's whose order(s) are before "Order".
944
1.31k
      assert(MI);
945
6.08k
      for (; DI != DE; 
++DI4.77k
) {
946
5.79k
        if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
947
1.02k
          break;
948
4.77k
        if ((*DI)->isEmitted())
949
431
          continue;
950
4.33k
951
4.33k
        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
952
4.33k
        if (DbgMI) {
953
4.33k
          if (!LastOrder)
954
4.15k
            // Insert to start of the BB (after PHIs).
955
4.15k
            BB->insert(BBBegin, DbgMI);
956
181
          else {
957
181
            // Insert at the instruction, which may be in a different
958
181
            // block, if the block was split by a custom inserter.
959
181
            MachineBasicBlock::iterator Pos = MI;
960
181
            MI->getParent()->insert(Pos, DbgMI);
961
181
          }
962
4.33k
        }
963
4.33k
      }
964
1.31k
      LastOrder = Order;
965
1.31k
    }
966
340
    // Add trailing DbgValue's before the terminator. FIXME: May want to add
967
340
    // some of them before one or more conditional branches?
968
340
    SmallVector<MachineInstr*, 8> DbgMIs;
969
401
    for (; DI != DE; 
++DI61
) {
970
61
      if ((*DI)->isEmitted())
971
30
        continue;
972
31
      assert((*DI)->getOrder() >= LastOrder &&
973
31
             "emitting DBG_VALUE out of order");
974
31
      if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
975
31
        DbgMIs.push_back(DbgMI);
976
31
    }
977
340
978
340
    MachineBasicBlock *InsertBB = Emitter.getBlock();
979
340
    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
980
340
    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
981
340
982
340
    SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
983
340
    SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
984
340
    // Now emit the rest according to source order.
985
340
    LastOrder = 0;
986
340
    for (const auto &InstrOrder : Orders) {
987
338
      unsigned Order = InstrOrder.first;
988
338
      MachineInstr *MI = InstrOrder.second;
989
338
      if (!MI)
990
0
        continue;
991
338
992
338
      // Insert all SDDbgLabel's whose order(s) are before "Order".
993
342
      
for (; 338
DLI != DLE &&
994
342
             
(*DLI)->getOrder() >= LastOrder6
&&
(*DLI)->getOrder() < Order6
;
995
338
             
++DLI4
) {
996
4
        MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
997
4
        if (DbgMI) {
998
4
          if (!LastOrder)
999
1
            // Insert to start of the BB (after PHIs).
1000
1
            BB->insert(BBBegin, DbgMI);
1001
3
          else {
1002
3
            // Insert at the instruction, which may be in a different
1003
3
            // block, if the block was split by a custom inserter.
1004
3
            MachineBasicBlock::iterator Pos = MI;
1005
3
            MI->getParent()->insert(Pos, DbgMI);
1006
3
          }
1007
4
        }
1008
4
      }
1009
338
      if (DLI == DLE)
1010
336
        break;
1011
2
1012
2
      LastOrder = Order;
1013
2
    }
1014
340
  }
1015
1.24M
1016
1.24M
  InsertPos = Emitter.getInsertPos();
1017
1.24M
  return Emitter.getBlock();
1018
1.24M
}
1019
1020
/// Return the basic block label.
1021
0
std::string ScheduleDAGSDNodes::getDAGName() const {
1022
0
  return "sunit-dag." + BB->getFullName();
1023
0
}