Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/MachinePipeliner.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10
//
11
// This SMS implementation is a target-independent back-end pass. When enabled,
12
// the pass runs just prior to the register allocation pass, while the machine
13
// IR is in SSA form. If software pipelining is successful, then the original
14
// loop is replaced by the optimized loop. The optimized loop contains one or
15
// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16
// the instructions cannot be scheduled in a given MII, we increase the MII by
17
// one and try again.
18
//
19
// The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20
// represent loop carried dependences in the DAG as order edges to the Phi
21
// nodes. We also perform several passes over the DAG to eliminate unnecessary
22
// edges that inhibit the ability to pipeline. The implementation uses the
23
// DFAPacketizer class to compute the minimum initiation interval and the check
24
// where an instruction may be inserted in the pipelined schedule.
25
//
26
// In order for the SMS pass to work, several target specific hooks need to be
27
// implemented to get information about the loop structure and to rewrite
28
// instructions.
29
//
30
//===----------------------------------------------------------------------===//
31
32
#include "llvm/ADT/ArrayRef.h"
33
#include "llvm/ADT/BitVector.h"
34
#include "llvm/ADT/DenseMap.h"
35
#include "llvm/ADT/MapVector.h"
36
#include "llvm/ADT/PriorityQueue.h"
37
#include "llvm/ADT/SetVector.h"
38
#include "llvm/ADT/SmallPtrSet.h"
39
#include "llvm/ADT/SmallSet.h"
40
#include "llvm/ADT/SmallVector.h"
41
#include "llvm/ADT/Statistic.h"
42
#include "llvm/ADT/iterator_range.h"
43
#include "llvm/Analysis/AliasAnalysis.h"
44
#include "llvm/Analysis/MemoryLocation.h"
45
#include "llvm/Analysis/ValueTracking.h"
46
#include "llvm/CodeGen/DFAPacketizer.h"
47
#include "llvm/CodeGen/LiveIntervals.h"
48
#include "llvm/CodeGen/MachineBasicBlock.h"
49
#include "llvm/CodeGen/MachineDominators.h"
50
#include "llvm/CodeGen/MachineFunction.h"
51
#include "llvm/CodeGen/MachineFunctionPass.h"
52
#include "llvm/CodeGen/MachineInstr.h"
53
#include "llvm/CodeGen/MachineInstrBuilder.h"
54
#include "llvm/CodeGen/MachineLoopInfo.h"
55
#include "llvm/CodeGen/MachineMemOperand.h"
56
#include "llvm/CodeGen/MachineOperand.h"
57
#include "llvm/CodeGen/MachinePipeliner.h"
58
#include "llvm/CodeGen/MachineRegisterInfo.h"
59
#include "llvm/CodeGen/RegisterPressure.h"
60
#include "llvm/CodeGen/ScheduleDAG.h"
61
#include "llvm/CodeGen/ScheduleDAGMutation.h"
62
#include "llvm/CodeGen/TargetOpcodes.h"
63
#include "llvm/CodeGen/TargetRegisterInfo.h"
64
#include "llvm/CodeGen/TargetSubtargetInfo.h"
65
#include "llvm/Config/llvm-config.h"
66
#include "llvm/IR/Attributes.h"
67
#include "llvm/IR/DebugLoc.h"
68
#include "llvm/IR/Function.h"
69
#include "llvm/MC/LaneBitmask.h"
70
#include "llvm/MC/MCInstrDesc.h"
71
#include "llvm/MC/MCInstrItineraries.h"
72
#include "llvm/MC/MCRegisterInfo.h"
73
#include "llvm/Pass.h"
74
#include "llvm/Support/CommandLine.h"
75
#include "llvm/Support/Compiler.h"
76
#include "llvm/Support/Debug.h"
77
#include "llvm/Support/MathExtras.h"
78
#include "llvm/Support/raw_ostream.h"
79
#include <algorithm>
80
#include <cassert>
81
#include <climits>
82
#include <cstdint>
83
#include <deque>
84
#include <functional>
85
#include <iterator>
86
#include <map>
87
#include <memory>
88
#include <tuple>
89
#include <utility>
90
#include <vector>
91
92
using namespace llvm;
93
94
#define DEBUG_TYPE "pipeliner"
95
96
STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
97
STATISTIC(NumPipelined, "Number of loops software pipelined");
98
STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
99
STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
100
STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
101
STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
102
STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
103
STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
104
STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
105
STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
106
STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
107
108
/// A command line option to turn software pipelining on or off.
109
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
110
                               cl::ZeroOrMore,
111
                               cl::desc("Enable Software Pipelining"));
112
113
/// A command line option to enable SWP at -Os.
114
static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
115
                                      cl::desc("Enable SWP at Os."), cl::Hidden,
116
                                      cl::init(false));
117
118
/// A command line argument to limit minimum initial interval for pipelining.
119
static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
120
                              cl::desc("Size limit for the MII."),
121
                              cl::Hidden, cl::init(27));
122
123
/// A command line argument to limit the number of stages in the pipeline.
124
static cl::opt<int>
125
    SwpMaxStages("pipeliner-max-stages",
126
                 cl::desc("Maximum stages allowed in the generated scheduled."),
127
                 cl::Hidden, cl::init(3));
128
129
/// A command line option to disable the pruning of chain dependences due to
130
/// an unrelated Phi.
131
static cl::opt<bool>
132
    SwpPruneDeps("pipeliner-prune-deps",
133
                 cl::desc("Prune dependences between unrelated Phi nodes."),
134
                 cl::Hidden, cl::init(true));
135
136
/// A command line option to disable the pruning of loop carried order
137
/// dependences.
138
static cl::opt<bool>
139
    SwpPruneLoopCarried("pipeliner-prune-loop-carried",
140
                        cl::desc("Prune loop carried order dependences."),
141
                        cl::Hidden, cl::init(true));
142
143
#ifndef NDEBUG
144
static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
145
#endif
146
147
static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
148
                                     cl::ReallyHidden, cl::init(false),
149
                                     cl::ZeroOrMore, cl::desc("Ignore RecMII"));
150
151
static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
152
                                    cl::init(false));
153
static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
154
                                      cl::init(false));
155
156
namespace llvm {
157
158
// A command line option to enable the CopyToPhi DAG mutation.
159
cl::opt<bool>
160
    SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
161
                       cl::init(true), cl::ZeroOrMore,
162
                       cl::desc("Enable CopyToPhi DAG Mutation"));
163
164
} // end namespace llvm
165
166
unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
167
char MachinePipeliner::ID = 0;
168
#ifndef NDEBUG
169
int MachinePipeliner::NumTries = 0;
170
#endif
171
char &llvm::MachinePipelinerID = MachinePipeliner::ID;
172
173
42.3k
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
174
42.3k
                      "Modulo Software Pipelining", false, false)
175
42.3k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
176
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
177
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
178
42.3k
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
179
42.3k
INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
180
                    "Modulo Software Pipelining", false, false)
181
182
/// The "main" function for implementing Swing Modulo Scheduling.
183
13.8k
bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
184
13.8k
  if (skipFunction(mf.getFunction()))
185
11
    return false;
186
13.8k
187
13.8k
  if (!EnableSWP)
188
9
    return false;
189
13.8k
190
13.8k
  if (mf.getFunction().getAttributes().hasAttribute(
191
13.8k
          AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
192
13.8k
      
!EnableSWPOptSize.getPosition()39
)
193
37
    return false;
194
13.7k
195
13.7k
  if (!mf.getSubtarget().enableMachinePipeliner())
196
10.4k
    return false;
197
3.31k
198
3.31k
  // Cannot pipeline loops without instruction itineraries if we are using
199
3.31k
  // DFA for the pipeliner.
200
3.31k
  if (mf.getSubtarget().useDFAforSMS() &&
201
3.31k
      
(3.31k
!mf.getSubtarget().getInstrItineraryData()3.31k
||
202
3.31k
       mf.getSubtarget().getInstrItineraryData()->isEmpty()))
203
0
    return false;
204
3.31k
205
3.31k
  MF = &mf;
206
3.31k
  MLI = &getAnalysis<MachineLoopInfo>();
207
3.31k
  MDT = &getAnalysis<MachineDominatorTree>();
208
3.31k
  TII = MF->getSubtarget().getInstrInfo();
209
3.31k
  RegClassInfo.runOnMachineFunction(*MF);
210
3.31k
211
3.31k
  for (auto &L : *MLI)
212
359
    scheduleLoop(*L);
213
3.31k
214
3.31k
  return false;
215
3.31k
}
216
217
/// Attempt to perform the SMS algorithm on the specified loop. This function is
218
/// the main entry point for the algorithm.  The function identifies candidate
219
/// loops, calculates the minimum initiation interval, and attempts to schedule
220
/// the loop.
221
397
bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
222
397
  bool Changed = false;
223
397
  for (auto &InnerLoop : L)
224
38
    Changed |= scheduleLoop(*InnerLoop);
225
397
226
#ifndef NDEBUG
227
  // Stop trying after reaching the limit (if any).
228
  int Limit = SwpLoopLimit;
229
  if (Limit >= 0) {
230
    if (NumTries >= SwpLoopLimit)
231
      return Changed;
232
    NumTries++;
233
  }
234
#endif
235
236
397
  setPragmaPipelineOptions(L);
237
397
  if (!canPipelineLoop(L)) {
238
205
    LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
239
205
    return Changed;
240
205
  }
241
192
242
192
  ++NumTrytoPipeline;
243
192
244
192
  Changed = swingModuloScheduler(L);
245
192
246
192
  return Changed;
247
192
}
248
249
397
void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
250
397
  MachineBasicBlock *LBLK = L.getTopBlock();
251
397
252
397
  if (LBLK == nullptr)
253
0
    return;
254
397
255
397
  const BasicBlock *BBLK = LBLK->getBasicBlock();
256
397
  if (BBLK == nullptr)
257
0
    return;
258
397
259
397
  const Instruction *TI = BBLK->getTerminator();
260
397
  if (TI == nullptr)
261
0
    return;
262
397
263
397
  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
264
397
  if (LoopID == nullptr)
265
394
    return;
266
3
267
3
  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
268
3
  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
269
3
270
6
  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; 
++i3
) {
271
3
    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
272
3
273
3
    if (MD == nullptr)
274
0
      continue;
275
3
276
3
    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
277
3
278
3
    if (S == nullptr)
279
0
      continue;
280
3
281
3
    if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
282
0
      assert(MD->getNumOperands() == 2 &&
283
0
             "Pipeline initiation interval hint metadata should have two operands.");
284
0
      II_setByPragma =
285
0
          mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
286
0
      assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
287
3
    } else if (S->getString() == "llvm.loop.pipeline.disable") {
288
0
      disabledByPragma = true;
289
0
    }
290
3
  }
291
3
}
292
293
/// Return true if the loop can be software pipelined.  The algorithm is
294
/// restricted to loops with a single basic block.  Make sure that the
295
/// branch in the loop can be analyzed.
296
397
bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
297
397
  if (L.getNumBlocks() != 1)
298
62
    return false;
299
335
300
335
  if (disabledByPragma)
301
0
    return false;
302
335
303
335
  // Check if the branch can't be understood because we can't do pipelining
304
335
  // if that's the case.
305
335
  LI.TBB = nullptr;
306
335
  LI.FBB = nullptr;
307
335
  LI.BrCond.clear();
308
335
  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
309
0
    LLVM_DEBUG(
310
0
        dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
311
0
    NumFailBranch++;
312
0
    return false;
313
0
  }
314
335
315
335
  LI.LoopInductionVar = nullptr;
316
335
  LI.LoopCompare = nullptr;
317
335
  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
318
142
    LLVM_DEBUG(
319
142
        dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
320
142
    NumFailLoop++;
321
142
    return false;
322
142
  }
323
193
324
193
  if (!L.getLoopPreheader()) {
325
1
    LLVM_DEBUG(
326
1
        dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
327
1
    NumFailPreheader++;
328
1
    return false;
329
1
  }
330
192
331
192
  // Remove any subregisters from inputs to phi nodes.
332
192
  preprocessPhiNodes(*L.getHeader());
333
192
  return true;
334
192
}
335
336
192
void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
337
192
  MachineRegisterInfo &MRI = MF->getRegInfo();
338
192
  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
339
192
340
428
  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
341
428
    MachineOperand &DefOp = PI.getOperand(0);
342
428
    assert(DefOp.getSubReg() == 0);
343
428
    auto *RC = MRI.getRegClass(DefOp.getReg());
344
428
345
1.28k
    for (unsigned i = 1, n = PI.getNumOperands(); i != n; 
i += 2856
) {
346
856
      MachineOperand &RegOp = PI.getOperand(i);
347
856
      if (RegOp.getSubReg() == 0)
348
843
        continue;
349
13
350
13
      // If the operand uses a subregister, replace it with a new register
351
13
      // without subregisters, and generate a copy to the new register.
352
13
      unsigned NewReg = MRI.createVirtualRegister(RC);
353
13
      MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
354
13
      MachineBasicBlock::iterator At = PredB.getFirstTerminator();
355
13
      const DebugLoc &DL = PredB.findDebugLoc(At);
356
13
      auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
357
13
                    .addReg(RegOp.getReg(), getRegState(RegOp),
358
13
                            RegOp.getSubReg());
359
13
      Slots.insertMachineInstrInMaps(*Copy);
360
13
      RegOp.setReg(NewReg);
361
13
      RegOp.setSubReg(0);
362
13
    }
363
428
  }
364
192
}
365
366
/// The SMS algorithm consists of the following main steps:
367
/// 1. Computation and analysis of the dependence graph.
368
/// 2. Ordering of the nodes (instructions).
369
/// 3. Attempt to Schedule the loop.
370
192
bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
371
192
  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
372
192
373
192
  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
374
192
                        II_setByPragma);
375
192
376
192
  MachineBasicBlock *MBB = L.getHeader();
377
192
  // The kernel should not include any terminator instructions.  These
378
192
  // will be added back later.
379
192
  SMS.startBlock(MBB);
380
192
381
192
  // Compute the number of 'real' instructions in the basic block by
382
192
  // ignoring terminators.
383
192
  unsigned size = MBB->size();
384
192
  for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
385
192
                                   E = MBB->instr_end();
386
575
       I != E; 
++I, --size383
)
387
383
    ;
388
192
389
192
  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
390
192
  SMS.schedule();
391
192
  SMS.exitRegion();
392
192
393
192
  SMS.finishBlock();
394
192
  return SMS.hasNewSchedule();
395
192
}
396
397
192
void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
398
192
  if (II_setByPragma > 0)
399
0
    MII = II_setByPragma;
400
192
  else
401
192
    MII = std::max(ResMII, RecMII);
402
192
}
403
404
192
void SwingSchedulerDAG::setMAX_II() {
405
192
  if (II_setByPragma > 0)
406
0
    MAX_II = II_setByPragma;
407
192
  else
408
192
    MAX_II = MII + 10;
409
192
}
410
411
/// We override the schedule function in ScheduleDAGInstrs to implement the
412
/// scheduling part of the Swing Modulo Scheduling algorithm.
413
192
void SwingSchedulerDAG::schedule() {
414
192
  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
415
192
  buildSchedGraph(AA);
416
192
  addLoopCarriedDependences(AA);
417
192
  updatePhiDependences();
418
192
  Topo.InitDAGTopologicalSorting();
419
192
  changeDependences();
420
192
  postprocessDAG();
421
192
  LLVM_DEBUG(dump());
422
192
423
192
  NodeSetType NodeSets;
424
192
  findCircuits(NodeSets);
425
192
  NodeSetType Circuits = NodeSets;
426
192
427
192
  // Calculate the MII.
428
192
  unsigned ResMII = calculateResMII();
429
192
  unsigned RecMII = calculateRecMII(NodeSets);
430
192
431
192
  fuseRecs(NodeSets);
432
192
433
192
  // This flag is used for testing and can cause correctness problems.
434
192
  if (SwpIgnoreRecMII)
435
1
    RecMII = 0;
436
192
437
192
  setMII(ResMII, RecMII);
438
192
  setMAX_II();
439
192
440
192
  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
441
192
                    << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
442
192
443
192
  // Can't schedule a loop without a valid MII.
444
192
  if (MII == 0) {
445
0
    LLVM_DEBUG(
446
0
        dbgs()
447
0
        << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
448
0
    NumFailZeroMII++;
449
0
    return;
450
0
  }
451
192
452
192
  // Don't pipeline large loops.
453
192
  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
454
2
    LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
455
2
                      << ", we don't pipleline large loops\n");
456
2
    NumFailLargeMaxMII++;
457
2
    return;
458
2
  }
459
190
460
190
  computeNodeFunctions(NodeSets);
461
190
462
190
  registerPressureFilter(NodeSets);
463
190
464
190
  colocateNodeSets(NodeSets);
465
190
466
190
  checkNodeSets(NodeSets);
467
190
468
190
  LLVM_DEBUG({
469
190
    for (auto &I : NodeSets) {
470
190
      dbgs() << "  Rec NodeSet ";
471
190
      I.dump();
472
190
    }
473
190
  });
474
190
475
190
  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
476
190
477
190
  groupRemainingNodes(NodeSets);
478
190
479
190
  removeDuplicateNodes(NodeSets);
480
190
481
190
  LLVM_DEBUG({
482
190
    for (auto &I : NodeSets) {
483
190
      dbgs() << "  NodeSet ";
484
190
      I.dump();
485
190
    }
486
190
  });
487
190
488
190
  computeNodeOrder(NodeSets);
489
190
490
190
  // check for node order issues
491
190
  checkValidNodeOrder(Circuits);
492
190
493
190
  SMSchedule Schedule(Pass.MF);
494
190
  Scheduled = schedulePipeline(Schedule);
495
190
496
190
  if (!Scheduled){
497
66
    LLVM_DEBUG(dbgs() << "No schedule found, return\n");
498
66
    NumFailNoSchedule++;
499
66
    return;
500
66
  }
501
124
502
124
  unsigned numStages = Schedule.getMaxStageCount();
503
124
  // No need to generate pipeline if there are no overlapped iterations.
504
124
  if (numStages == 0) {
505
0
    LLVM_DEBUG(
506
0
        dbgs() << "No overlapped iterations, no need to generate pipeline\n");
507
0
    NumFailZeroStage++;
508
0
    return;
509
0
  }
510
124
  // Check that the maximum stage count is less than user-defined limit.
511
124
  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
512
0
    LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
513
0
                      << " : too many stages, abort\n");
514
0
    NumFailLargeMaxStage++;
515
0
    return;
516
0
  }
517
124
518
124
  generatePipelinedLoop(Schedule);
519
124
  ++NumPipelined;
520
124
}
521
522
/// Clean up after the software pipeliner runs.
523
192
void SwingSchedulerDAG::finishBlock() {
524
192
  for (MachineInstr *I : NewMIs)
525
21
    MF.DeleteMachineInstr(I);
526
192
  NewMIs.clear();
527
192
528
192
  // Call the superclass.
529
192
  ScheduleDAGInstrs::finishBlock();
530
192
}
531
532
/// Return the register values for  the operands of a Phi instruction.
533
/// This function assume the instruction is a Phi.
534
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
535
2.69k
                       unsigned &InitVal, unsigned &LoopVal) {
536
2.69k
  assert(Phi.isPHI() && "Expecting a Phi.");
537
2.69k
538
2.69k
  InitVal = 0;
539
2.69k
  LoopVal = 0;
540
8.08k
  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; 
i += 25.38k
)
541
5.38k
    if (Phi.getOperand(i + 1).getMBB() != Loop)
542
2.69k
      InitVal = Phi.getOperand(i).getReg();
543
2.69k
    else
544
2.69k
      LoopVal = Phi.getOperand(i).getReg();
545
2.69k
546
2.69k
  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
547
2.69k
}
548
549
/// Return the Phi register value that comes from the incoming block.
550
335
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
551
335
  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; 
i += 20
)
552
335
    if (Phi.getOperand(i + 1).getMBB() != LoopBB)
553
335
      return Phi.getOperand(i).getReg();
554
335
  
return 00
;
555
335
}
556
557
/// Return the Phi register value that comes the loop block.
558
1.69k
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
559
3.45k
  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; 
i += 21.75k
)
560
3.39k
    if (Phi.getOperand(i + 1).getMBB() == LoopBB)
561
1.63k
      return Phi.getOperand(i).getReg();
562
1.69k
  
return 062
;
563
1.69k
}
564
565
/// Return true if SUb can be reached from SUa following the chain edges.
566
365
static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
567
365
  SmallPtrSet<SUnit *, 8> Visited;
568
365
  SmallVector<SUnit *, 8> Worklist;
569
365
  Worklist.push_back(SUa);
570
531
  while (!Worklist.empty()) {
571
442
    const SUnit *SU = Worklist.pop_back_val();
572
954
    for (auto &SI : SU->Succs) {
573
954
      SUnit *SuccSU = SI.getSUnit();
574
954
      if (SI.getKind() == SDep::Order) {
575
544
        if (Visited.count(SuccSU))
576
0
          continue;
577
544
        if (SuccSU == SUb)
578
276
          return true;
579
268
        Worklist.push_back(SuccSU);
580
268
        Visited.insert(SuccSU);
581
268
      }
582
954
    }
583
442
  }
584
365
  
return false89
;
585
365
}
586
587
/// Return true if the instruction causes a chain between memory
588
/// references before and after it.
589
2.03k
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
590
2.03k
  return MI.isCall() || MI.mayRaiseFPException() ||
591
2.03k
         MI.hasUnmodeledSideEffects() ||
592
2.03k
         
(2.03k
MI.hasOrderedMemoryRef()2.03k
&&
593
2.03k
          
(1
!MI.mayLoad()1
||
!MI.isDereferenceableInvariantLoad(AA)1
));
594
2.03k
}
595
596
/// Return the underlying objects for the memory references of an instruction.
597
/// This function calls the code in ValueTracking, but first checks that the
598
/// instruction has a memory operand.
599
static void getUnderlyingObjects(const MachineInstr *MI,
600
                                 SmallVectorImpl<const Value *> &Objs,
601
491
                                 const DataLayout &DL) {
602
491
  if (!MI->hasOneMemOperand())
603
38
    return;
604
453
  MachineMemOperand *MM = *MI->memoperands_begin();
605
453
  if (!MM->getValue())
606
0
    return;
607
453
  GetUnderlyingObjects(MM->getValue(), Objs, DL);
608
453
  for (const Value *V : Objs) {
609
453
    if (!isIdentifiedObject(V)) {
610
295
      Objs.clear();
611
295
      return;
612
295
    }
613
158
    Objs.push_back(V);
614
158
  }
615
453
}
616
617
/// Add a chain edge between a load and store if the store can be an
618
/// alias of the load on a subsequent iteration, i.e., a loop carried
619
/// dependence. This code is very similar to the code in ScheduleDAGInstrs
620
/// but that code doesn't create loop carried dependences.
621
192
void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
622
192
  MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
623
192
  Value *UnknownValue =
624
192
    UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
625
2.03k
  for (auto &SU : SUnits) {
626
2.03k
    MachineInstr &MI = *SU.getInstr();
627
2.03k
    if (isDependenceBarrier(MI, AA))
628
3
      PendingLoads.clear();
629
2.03k
    else if (MI.mayLoad()) {
630
328
      SmallVector<const Value *, 4> Objs;
631
328
      getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
632
328
      if (Objs.empty())
633
226
        Objs.push_back(UnknownValue);
634
430
      for (auto V : Objs) {
635
430
        SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
636
430
        SUs.push_back(&SU);
637
430
      }
638
1.70k
    } else if (MI.mayStore()) {
639
163
      SmallVector<const Value *, 4> Objs;
640
163
      getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
641
163
      if (Objs.empty())
642
107
        Objs.push_back(UnknownValue);
643
219
      for (auto V : Objs) {
644
219
        MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
645
219
            PendingLoads.find(V);
646
219
        if (I == PendingLoads.end())
647
106
          continue;
648
365
        
for (auto Load : I->second)113
{
649
365
          if (isSuccOrder(Load, &SU))
650
276
            continue;
651
89
          MachineInstr &LdMI = *Load->getInstr();
652
89
          // First, perform the cheaper check that compares the base register.
653
89
          // If they are the same and the load offset is less than the store
654
89
          // offset, then mark the dependence as loop carried potentially.
655
89
          const MachineOperand *BaseOp1, *BaseOp2;
656
89
          int64_t Offset1, Offset2;
657
89
          if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
658
89
              
TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)88
) {
659
86
            if (BaseOp1->isIdenticalTo(*BaseOp2) &&
660
86
                
(int)Offset1 < (int)Offset244
) {
661
24
              assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
662
24
                     "What happened to the chain edge?");
663
24
              SDep Dep(Load, SDep::Barrier);
664
24
              Dep.setLatency(1);
665
24
              SU.addPred(Dep);
666
24
              continue;
667
24
            }
668
65
          }
669
65
          // Second, the more expensive check that uses alias analysis on the
670
65
          // base registers. If they alias, and the load offset is less than
671
65
          // the store offset, the mark the dependence as loop carried.
672
65
          if (!AA) {
673
0
            SDep Dep(Load, SDep::Barrier);
674
0
            Dep.setLatency(1);
675
0
            SU.addPred(Dep);
676
0
            continue;
677
0
          }
678
65
          MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
679
65
          MachineMemOperand *MMO2 = *MI.memoperands_begin();
680
65
          if (!MMO1->getValue() || !MMO2->getValue()) {
681
0
            SDep Dep(Load, SDep::Barrier);
682
0
            Dep.setLatency(1);
683
0
            SU.addPred(Dep);
684
0
            continue;
685
0
          }
686
65
          if (MMO1->getValue() == MMO2->getValue() &&
687
65
              
MMO1->getOffset() <= MMO2->getOffset()20
) {
688
0
            SDep Dep(Load, SDep::Barrier);
689
0
            Dep.setLatency(1);
690
0
            SU.addPred(Dep);
691
0
            continue;
692
0
          }
693
65
          AliasResult AAResult = AA->alias(
694
65
              MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
695
65
                             MMO1->getAAInfo()),
696
65
              MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
697
65
                             MMO2->getAAInfo()));
698
65
699
65
          if (AAResult != NoAlias) {
700
20
            SDep Dep(Load, SDep::Barrier);
701
20
            Dep.setLatency(1);
702
20
            SU.addPred(Dep);
703
20
          }
704
65
        }
705
113
      }
706
163
    }
707
2.03k
  }
708
192
}
709
710
/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
711
/// processes dependences for PHIs. This function adds true dependences
712
/// from a PHI to a use, and a loop carried dependence from the use to the
713
/// PHI. The loop carried dependence is represented as an anti dependence
714
/// edge. This function also removes chain dependences between unrelated
715
/// PHIs.
716
192
void SwingSchedulerDAG::updatePhiDependences() {
717
192
  SmallVector<SDep, 4> RemoveDeps;
718
192
  const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
719
192
720
192
  // Iterate over each DAG node.
721
2.03k
  for (SUnit &I : SUnits) {
722
2.03k
    RemoveDeps.clear();
723
2.03k
    // Set to true if the instruction has an operand defined by a Phi.
724
2.03k
    unsigned HasPhiUse = 0;
725
2.03k
    unsigned HasPhiDef = 0;
726
2.03k
    MachineInstr *MI = I.getInstr();
727
2.03k
    // Iterate over each operand, and we process the definitions.
728
2.03k
    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
729
2.03k
                                    MOE = MI->operands_end();
730
9.72k
         MOI != MOE; 
++MOI7.69k
) {
731
7.69k
      if (!MOI->isReg())
732
1.94k
        continue;
733
5.74k
      unsigned Reg = MOI->getReg();
734
5.74k
      if (MOI->isDef()) {
735
2.04k
        // If the register is used by a Phi, then create an anti dependence.
736
2.04k
        for (MachineRegisterInfo::use_instr_iterator
737
2.04k
                 UI = MRI.use_instr_begin(Reg),
738
2.04k
                 UE = MRI.use_instr_end();
739
4.82k
             UI != UE; 
++UI2.78k
) {
740
2.78k
          MachineInstr *UseMI = &*UI;
741
2.78k
          SUnit *SU = getSUnit(UseMI);
742
2.78k
          if (SU != nullptr && 
UseMI->isPHI()2.70k
) {
743
425
            if (!MI->isPHI()) {
744
399
              SDep Dep(SU, SDep::Anti, Reg);
745
399
              Dep.setLatency(1);
746
399
              I.addPred(Dep);
747
399
            } else {
748
26
              HasPhiDef = Reg;
749
26
              // Add a chain edge to a dependent Phi that isn't an existing
750
26
              // predecessor.
751
26
              if (SU->NodeNum < I.NodeNum && 
!I.isPred(SU)14
)
752
14
                I.addPred(SDep(SU, SDep::Barrier));
753
26
            }
754
425
          }
755
2.78k
        }
756
3.70k
      } else if (MOI->isUse()) {
757
3.70k
        // If the register is defined by a Phi, then create a true dependence.
758
3.70k
        MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
759
3.70k
        if (DefMI == nullptr)
760
5
          continue;
761
3.69k
        SUnit *SU = getSUnit(DefMI);
762
3.69k
        if (SU != nullptr && 
DefMI->isPHI()2.72k
) {
763
785
          if (!MI->isPHI()) {
764
759
            SDep Dep(SU, SDep::Data, Reg);
765
759
            Dep.setLatency(0);
766
759
            ST.adjustSchedDependency(SU, &I, Dep);
767
759
            I.addPred(Dep);
768
759
          } else {
769
26
            HasPhiUse = Reg;
770
26
            // Add a chain edge to a dependent Phi that isn't an existing
771
26
            // predecessor.
772
26
            if (SU->NodeNum < I.NodeNum && 
!I.isPred(SU)12
)
773
0
              I.addPred(SDep(SU, SDep::Barrier));
774
26
          }
775
785
        }
776
3.69k
      }
777
5.74k
    }
778
2.03k
    // Remove order dependences from an unrelated Phi.
779
2.03k
    if (!SwpPruneDeps)
780
0
      continue;
781
2.93k
    
for (auto &PI : I.Preds)2.03k
{
782
2.93k
      MachineInstr *PMI = PI.getSUnit()->getInstr();
783
2.93k
      if (PMI->isPHI() && 
PI.getKind() == SDep::Order1.18k
) {
784
14
        if (I.getInstr()->isPHI()) {
785
14
          if (PMI->getOperand(0).getReg() == HasPhiUse)
786
0
            continue;
787
14
          if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
788
14
            continue;
789
0
        }
790
0
        RemoveDeps.push_back(PI);
791
0
      }
792
2.93k
    }
793
2.03k
    for (int i = 0, e = RemoveDeps.size(); i != e; 
++i0
)
794
0
      I.removePred(RemoveDeps[i]);
795
2.03k
  }
796
192
}
797
798
/// Iterate over each DAG node and see if we can change any dependences
799
/// in order to reduce the recurrence MII.
800
192
void SwingSchedulerDAG::changeDependences() {
801
192
  // See if an instruction can use a value from the previous iteration.
802
192
  // If so, we update the base and offset of the instruction and change
803
192
  // the dependences.
804
2.03k
  for (SUnit &I : SUnits) {
805
2.03k
    unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
806
2.03k
    int64_t NewOffset = 0;
807
2.03k
    if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
808
2.03k
                               NewOffset))
809
2.01k
      continue;
810
25
811
25
    // Get the MI and SUnit for the instruction that defines the original base.
812
25
    unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
813
25
    MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
814
25
    if (!DefMI)
815
0
      continue;
816
25
    SUnit *DefSU = getSUnit(DefMI);
817
25
    if (!DefSU)
818
0
      continue;
819
25
    // Get the MI and SUnit for the instruction that defins the new base.
820
25
    MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
821
25
    if (!LastMI)
822
0
      continue;
823
25
    SUnit *LastSU = getSUnit(LastMI);
824
25
    if (!LastSU)
825
0
      continue;
826
25
827
25
    if (Topo.IsReachable(&I, LastSU))
828
0
      continue;
829
25
830
25
    // Remove the dependence. The value now depends on a prior iteration.
831
25
    SmallVector<SDep, 4> Deps;
832
50
    for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
833
25
         ++P)
834
25
      if (P->getSUnit() == DefSU)
835
25
        Deps.push_back(*P);
836
50
    for (int i = 0, e = Deps.size(); i != e; 
i++25
) {
837
25
      Topo.RemovePred(&I, Deps[i].getSUnit());
838
25
      I.removePred(Deps[i]);
839
25
    }
840
25
    // Remove the chain dependence between the instructions.
841
25
    Deps.clear();
842
25
    for (auto &P : LastSU->Preds)
843
98
      if (P.getSUnit() == &I && 
P.getKind() == SDep::Order22
)
844
22
        Deps.push_back(P);
845
47
    for (int i = 0, e = Deps.size(); i != e; 
i++22
) {
846
22
      Topo.RemovePred(LastSU, Deps[i].getSUnit());
847
22
      LastSU->removePred(Deps[i]);
848
22
    }
849
25
850
25
    // Add a dependence between the new instruction and the instruction
851
25
    // that defines the new base.
852
25
    SDep Dep(&I, SDep::Anti, NewBase);
853
25
    Topo.AddPred(LastSU, &I);
854
25
    LastSU->addPred(Dep);
855
25
856
25
    // Remember the base and offset information so that we can update the
857
25
    // instruction during code generation.
858
25
    InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
859
25
  }
860
192
}
861
862
namespace {
863
864
// FuncUnitSorter - Comparison operator used to sort instructions by
865
// the number of functional unit choices.
866
struct FuncUnitSorter {
867
  const InstrItineraryData *InstrItins;
868
  const MCSubtargetInfo *STI;
869
  DenseMap<unsigned, unsigned> Resources;
870
871
  FuncUnitSorter(const TargetSubtargetInfo &TSI)
872
192
      : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
873
874
  // Compute the number of functional unit alternatives needed
875
  // at each stage, and take the minimum value. We prioritize the
876
  // instructions by the least number of choices first.
877
16.4k
  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
878
16.4k
    unsigned SchedClass = Inst->getDesc().getSchedClass();
879
16.4k
    unsigned min = UINT_MAX;
880
16.4k
    if (InstrItins && !InstrItins->isEmpty()) {
881
16.2k
      for (const InstrStage &IS :
882
16.2k
           make_range(InstrItins->beginStage(SchedClass),
883
24.5k
                      InstrItins->endStage(SchedClass))) {
884
24.5k
        unsigned funcUnits = IS.getUnits();
885
24.5k
        unsigned numAlternatives = countPopulation(funcUnits);
886
24.5k
        if (numAlternatives < min) {
887
17.8k
          min = numAlternatives;
888
17.8k
          F = funcUnits;
889
17.8k
        }
890
24.5k
      }
891
16.2k
      return min;
892
16.2k
    }
893
140
    if (STI && STI->getSchedModel().hasInstrSchedModel()) {
894
140
      const MCSchedClassDesc *SCDesc =
895
140
          STI->getSchedModel().getSchedClassDesc(SchedClass);
896
140
      if (!SCDesc->isValid())
897
0
        // No valid Schedule Class Desc for schedClass, should be
898
0
        // Pseudo/PostRAPseudo
899
0
        return min;
900
140
901
140
      for (const MCWriteProcResEntry &PRE :
902
140
           make_range(STI->getWriteProcResBegin(SCDesc),
903
661
                      STI->getWriteProcResEnd(SCDesc))) {
904
661
        if (!PRE.Cycles)
905
0
          continue;
906
661
        const MCProcResourceDesc *ProcResource =
907
661
            STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
908
661
        unsigned NumUnits = ProcResource->NumUnits;
909
661
        if (NumUnits < min) {
910
232
          min = NumUnits;
911
232
          F = PRE.ProcResourceIdx;
912
232
        }
913
661
      }
914
140
      return min;
915
140
    }
916
0
    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
917
0
  }
918
919
  // Compute the critical resources needed by the instruction. This
920
  // function records the functional units needed by instructions that
921
  // must use only one functional unit. We use this as a tie breaker
922
  // for computing the resource MII. The instrutions that require
923
  // the same, highly used, functional unit have high priority.
924
1.61k
  void calcCriticalResources(MachineInstr &MI) {
925
1.61k
    unsigned SchedClass = MI.getDesc().getSchedClass();
926
1.61k
    if (InstrItins && !InstrItins->isEmpty()) {
927
1.59k
      for (const InstrStage &IS :
928
1.59k
           make_range(InstrItins->beginStage(SchedClass),
929
2.37k
                      InstrItins->endStage(SchedClass))) {
930
2.37k
        unsigned FuncUnits = IS.getUnits();
931
2.37k
        if (countPopulation(FuncUnits) == 1)
932
683
          Resources[FuncUnits]++;
933
2.37k
      }
934
1.59k
      return;
935
1.59k
    }
936
19
    if (STI && STI->getSchedModel().hasInstrSchedModel()) {
937
19
      const MCSchedClassDesc *SCDesc =
938
19
          STI->getSchedModel().getSchedClassDesc(SchedClass);
939
19
      if (!SCDesc->isValid())
940
0
        // No valid Schedule Class Desc for schedClass, should be
941
0
        // Pseudo/PostRAPseudo
942
0
        return;
943
19
944
19
      for (const MCWriteProcResEntry &PRE :
945
19
           make_range(STI->getWriteProcResBegin(SCDesc),
946
96
                      STI->getWriteProcResEnd(SCDesc))) {
947
96
        if (!PRE.Cycles)
948
0
          continue;
949
96
        Resources[PRE.ProcResourceIdx]++;
950
96
      }
951
19
      return;
952
19
    }
953
0
    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
954
0
  }
955
956
  /// Return true if IS1 has less priority than IS2.
957
8.21k
  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
958
8.21k
    unsigned F1 = 0, F2 = 0;
959
8.21k
    unsigned MFUs1 = minFuncUnits(IS1, F1);
960
8.21k
    unsigned MFUs2 = minFuncUnits(IS2, F2);
961
8.21k
    if (MFUs1 == 1 && 
MFUs2 == 12.22k
)
962
1.32k
      return Resources.lookup(F1) < Resources.lookup(F2);
963
6.89k
    return MFUs1 > MFUs2;
964
6.89k
  }
965
};
966
967
} // end anonymous namespace
968
969
/// Calculate the resource constrained minimum initiation interval for the
970
/// specified loop. We use the DFA to model the resources needed for
971
/// each instruction, and we ignore dependences. A different DFA is created
972
/// for each cycle that is required. When adding a new instruction, we attempt
973
/// to add it to each existing DFA, until a legal space is found. If the
974
/// instruction cannot be reserved in an existing DFA, we create a new one.
975
192
unsigned SwingSchedulerDAG::calculateResMII() {
976
192
977
192
  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
978
192
  SmallVector<ResourceManager*, 8> Resources;
979
192
  MachineBasicBlock *MBB = Loop.getHeader();
980
192
  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
981
192
982
192
  // Sort the instructions by the number of available choices for scheduling,
983
192
  // least to most. Use the number of critical resources as the tie breaker.
984
192
  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
985
192
  for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
986
192
                                   E = MBB->getFirstTerminator();
987
1.80k
       I != E; 
++I1.61k
)
988
1.61k
    FUS.calcCriticalResources(*I);
989
192
  PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
990
192
      FuncUnitOrder(FUS);
991
192
992
192
  for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
993
192
                                   E = MBB->getFirstTerminator();
994
1.80k
       I != E; 
++I1.61k
)
995
1.61k
    FuncUnitOrder.push(&*I);
996
192
997
1.80k
  while (!FuncUnitOrder.empty()) {
998
1.61k
    MachineInstr *MI = FuncUnitOrder.top();
999
1.61k
    FuncUnitOrder.pop();
1000
1.61k
    if (TII->isZeroCost(MI->getOpcode()))
1001
89
      continue;
1002
1.52k
    // Attempt to reserve the instruction in an existing DFA. At least one
1003
1.52k
    // DFA is needed for each cycle.
1004
1.52k
    unsigned NumCycles = getSUnit(MI)->Latency;
1005
1.52k
    unsigned ReservedCycles = 0;
1006
1.52k
    SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
1007
1.52k
    SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
1008
1.52k
    LLVM_DEBUG({
1009
1.52k
      dbgs() << "Trying to reserve resource for " << NumCycles
1010
1.52k
             << " cycles for \n";
1011
1.52k
      MI->dump();
1012
1.52k
    });
1013
3.07k
    for (unsigned C = 0; C < NumCycles; 
++C1.54k
)
1014
6.19k
      
while (1.54k
RI != RE) {
1015
5.84k
        if ((*RI)->canReserveResources(*MI)) {
1016
1.19k
          (*RI)->reserveResources(*MI);
1017
1.19k
          ++ReservedCycles;
1018
1.19k
          break;
1019
1.19k
        }
1020
4.64k
        RI++;
1021
4.64k
      }
1022
1.52k
    LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1023
1.52k
                      << ", NumCycles:" << NumCycles << "\n");
1024
1.52k
    // Add new DFAs, if needed, to reserve resources.
1025
1.87k
    for (unsigned C = ReservedCycles; C < NumCycles; 
++C352
) {
1026
352
      LLVM_DEBUG(if (SwpDebugResource) dbgs()
1027
352
                 << "NewResource created to reserve resources"
1028
352
                 << "\n");
1029
352
      ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1030
352
      assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1031
352
      NewResource->reserveResources(*MI);
1032
352
      Resources.push_back(NewResource);
1033
352
    }
1034
1.52k
  }
1035
192
  int Resmii = Resources.size();
1036
192
  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
1037
192
  // Delete the memory for each of the DFAs that were created earlier.
1038
544
  for (ResourceManager *RI : Resources) {
1039
544
    ResourceManager *D = RI;
1040
544
    delete D;
1041
544
  }
1042
192
  Resources.clear();
1043
192
  return Resmii;
1044
192
}
1045
1046
/// Calculate the recurrence-constrainted minimum initiation interval.
1047
/// Iterate over each circuit.  Compute the delay(c) and distance(c)
1048
/// for each circuit. The II needs to satisfy the inequality
1049
/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1050
/// II that satisfies the inequality, and the RecMII is the maximum
1051
/// of those values.
1052
192
unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1053
192
  unsigned RecMII = 0;
1054
192
1055
623
  for (NodeSet &Nodes : NodeSets) {
1056
623
    if (Nodes.empty())
1057
0
      continue;
1058
623
1059
623
    unsigned Delay = Nodes.getLatency();
1060
623
    unsigned Distance = 1;
1061
623
1062
623
    // ii = ceil(delay / distance)
1063
623
    unsigned CurMII = (Delay + Distance - 1) / Distance;
1064
623
    Nodes.setRecMII(CurMII);
1065
623
    if (CurMII > RecMII)
1066
228
      RecMII = CurMII;
1067
623
  }
1068
192
1069
192
  return RecMII;
1070
192
}
1071
1072
/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1073
/// but we do this to find the circuits, and then change them back.
1074
384
static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1075
384
  SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
1076
4.45k
  for (unsigned i = 0, e = SUnits.size(); i != e; 
++i4.07k
) {
1077
4.07k
    SUnit *SU = &SUnits[i];
1078
4.07k
    for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1079
9.90k
         IP != EP; 
++IP5.82k
) {
1080
5.82k
      if (IP->getKind() != SDep::Anti)
1081
4.97k
        continue;
1082
854
      DepsAdded.push_back(std::make_pair(SU, *IP));
1083
854
    }
1084
4.07k
  }
1085
384
  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1086
384
                                                          E = DepsAdded.end();
1087
1.23k
       I != E; 
++I854
) {
1088
854
    // Remove this anti dependency and add one in the reverse direction.
1089
854
    SUnit *SU = I->first;
1090
854
    SDep &D = I->second;
1091
854
    SUnit *TargetSU = D.getSUnit();
1092
854
    unsigned Reg = D.getReg();
1093
854
    unsigned Lat = D.getLatency();
1094
854
    SU->removePred(D);
1095
854
    SDep Dep(SU, SDep::Anti, Reg);
1096
854
    Dep.setLatency(Lat);
1097
854
    TargetSU->addPred(Dep);
1098
854
  }
1099
384
}
1100
1101
/// Create the adjacency structure of the nodes in the graph.
1102
void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1103
192
    SwingSchedulerDAG *DAG) {
1104
192
  BitVector Added(SUnits.size());
1105
192
  DenseMap<int, int> OutputDeps;
1106
2.22k
  for (int i = 0, e = SUnits.size(); i != e; 
++i2.03k
) {
1107
2.03k
    Added.reset();
1108
2.03k
    // Add any successor to the adjacency matrix and exclude duplicates.
1109
2.91k
    for (auto &SI : SUnits[i].Succs) {
1110
2.91k
      // Only create a back-edge on the first and last nodes of a dependence
1111
2.91k
      // chain. This records any chains and adds them later.
1112
2.91k
      if (SI.getKind() == SDep::Output) {
1113
4
        int N = SI.getSUnit()->NodeNum;
1114
4
        int BackEdge = i;
1115
4
        auto Dep = OutputDeps.find(BackEdge);
1116
4
        if (Dep != OutputDeps.end()) {
1117
2
          BackEdge = Dep->second;
1118
2
          OutputDeps.erase(Dep);
1119
2
        }
1120
4
        OutputDeps[N] = BackEdge;
1121
4
      }
1122
2.91k
      // Do not process a boundary node, an artificial node.
1123
2.91k
      // A back-edge is processed only if it goes to a Phi.
1124
2.91k
      if (SI.getSUnit()->isBoundaryNode() || 
SI.isArtificial()2.91k
||
1125
2.91k
          
(2.90k
SI.getKind() == SDep::Anti2.90k
&&
!SI.getSUnit()->getInstr()->isPHI()427
))
1126
38
        continue;
1127
2.88k
      int N = SI.getSUnit()->NodeNum;
1128
2.88k
      if (!Added.test(N)) {
1129
2.86k
        AdjK[i].push_back(N);
1130
2.86k
        Added.set(N);
1131
2.86k
      }
1132
2.88k
    }
1133
2.03k
    // A chain edge between a store and a load is treated as a back-edge in the
1134
2.03k
    // adjacency matrix.
1135
2.91k
    for (auto &PI : SUnits[i].Preds) {
1136
2.91k
      if (!SUnits[i].getInstr()->mayStore() ||
1137
2.91k
          
!DAG->isLoopCarriedDep(&SUnits[i], PI, false)519
)
1138
2.77k
        continue;
1139
142
      if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1140
141
        int N = PI.getSUnit()->NodeNum;
1141
141
        if (!Added.test(N)) {
1142
141
          AdjK[i].push_back(N);
1143
141
          Added.set(N);
1144
141
        }
1145
141
      }
1146
142
    }
1147
2.03k
  }
1148
192
  // Add back-edges in the adjacency matrix for the output dependences.
1149
192
  for (auto &OD : OutputDeps)
1150
1
    if (!Added.test(OD.second)) {
1151
1
      AdjK[OD.first].push_back(OD.second);
1152
1
      Added.set(OD.second);
1153
1
    }
1154
192
}
1155
1156
/// Identify an elementary circuit in the dependence graph starting at the
1157
/// specified node.
1158
bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1159
16.7k
                                          bool HasBackedge) {
1160
16.7k
  SUnit *SV = &SUnits[V];
1161
16.7k
  bool F = false;
1162
16.7k
  Stack.insert(SV);
1163
16.7k
  Blocked.set(V);
1164
16.7k
1165
24.5k
  for (auto W : AdjK[V]) {
1166
24.5k
    if (NumPaths > MaxPaths)
1167
151
      break;
1168
24.3k
    if (W < S)
1169
3.45k
      continue;
1170
20.9k
    if (W == S) {
1171
815
      if (!HasBackedge)
1172
623
        NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1173
815
      F = true;
1174
815
      ++NumPaths;
1175
815
      break;
1176
20.0k
    } else if (!Blocked.test(W)) {
1177
14.6k
      if (circuit(W, S, NodeSets,
1178
14.6k
                  Node2Idx->at(W) < Node2Idx->at(V) ? 
true1.26k
:
HasBackedge13.4k
))
1179
2.13k
        F = true;
1180
14.6k
    }
1181
20.9k
  }
1182
16.7k
1183
16.7k
  if (F)
1184
2.59k
    unblock(V);
1185
14.1k
  else {
1186
19.3k
    for (auto W : AdjK[V]) {
1187
19.3k
      if (W < S)
1188
3.08k
        continue;
1189
16.2k
      if (B[W].count(SV) == 0)
1190
15.7k
        B[W].insert(SV);
1191
16.2k
    }
1192
14.1k
  }
1193
16.7k
  Stack.pop_back();
1194
16.7k
  return F;
1195
16.7k
}
1196
1197
/// Unblock a node in the circuit finding algorithm.
1198
5.63k
void SwingSchedulerDAG::Circuits::unblock(int U) {
1199
5.63k
  Blocked.reset(U);
1200
5.63k
  SmallPtrSet<SUnit *, 4> &BU = B[U];
1201
10.3k
  while (!BU.empty()) {
1202
4.75k
    SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
1203
4.75k
    assert(SI != BU.end() && "Invalid B set.");
1204
4.75k
    SUnit *W = *SI;
1205
4.75k
    BU.erase(W);
1206
4.75k
    if (Blocked.test(W->NodeNum))
1207
3.03k
      unblock(W->NodeNum);
1208
4.75k
  }
1209
5.63k
}
1210
1211
/// Identify all the elementary circuits in the dependence graph using
1212
/// Johnson's circuit algorithm.
1213
192
void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1214
192
  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1215
192
  // but we do this to find the circuits, and then change them back.
1216
192
  swapAntiDependences(SUnits);
1217
192
1218
192
  Circuits Cir(SUnits, Topo);
1219
192
  // Create the adjacency structure.
1220
192
  Cir.createAdjacencyStructure(this);
1221
2.22k
  for (int i = 0, e = SUnits.size(); i != e; 
++i2.03k
) {
1222
2.03k
    Cir.reset();
1223
2.03k
    Cir.circuit(i, i, NodeSets);
1224
2.03k
  }
1225
192
1226
192
  // Change the dependences back so that we've created a DAG again.
1227
192
  swapAntiDependences(SUnits);
1228
192
}
1229
1230
// Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1231
// is loop-carried to the USE in next iteration. This will help pipeliner avoid
1232
// additional copies that are needed across iterations. An artificial dependence
1233
// edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1234
1235
// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1236
// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1237
// PHI-------True-Dep------> USEOfPhi
1238
1239
// The mutation creates
1240
// USEOfPHI -------Artificial-Dep---> SRCOfCopy
1241
1242
// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1243
// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1244
// late  to avoid additional copies across iterations. The possible scheduling
1245
// order would be
1246
// USEOfPHI --- SRCOfCopy---  COPY/REG_SEQUENCE.
1247
1248
192
void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
1249
2.03k
  for (SUnit &SU : DAG->SUnits) {
1250
2.03k
    // Find the COPY/REG_SEQUENCE instruction.
1251
2.03k
    if (!SU.getInstr()->isCopy() && 
!SU.getInstr()->isRegSequence()2.02k
)
1252
1.95k
      continue;
1253
87
1254
87
    // Record the loop carried PHIs.
1255
87
    SmallVector<SUnit *, 4> PHISUs;
1256
87
    // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1257
87
    SmallVector<SUnit *, 4> SrcSUs;
1258
87
1259
161
    for (auto &Dep : SU.Preds) {
1260
161
      SUnit *TmpSU = Dep.getSUnit();
1261
161
      MachineInstr *TmpMI = TmpSU->getInstr();
1262
161
      SDep::Kind DepKind = Dep.getKind();
1263
161
      // Save the loop carried PHI.
1264
161
      if (DepKind == SDep::Anti && 
TmpMI->isPHI()12
)
1265
12
        PHISUs.push_back(TmpSU);
1266
149
      // Save the source of COPY/REG_SEQUENCE.
1267
149
      // If the source has no pre-decessors, we will end up creating cycles.
1268
149
      else if (DepKind == SDep::Data && !TmpMI->isPHI() && 
TmpSU->NumPreds > 0127
)
1269
124
        SrcSUs.push_back(TmpSU);
1270
161
    }
1271
87
1272
87
    if (PHISUs.size() == 0 || 
SrcSUs.size() == 012
)
1273
76
      continue;
1274
11
1275
11
    // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1276
11
    // SUnit to the container.
1277
11
    SmallVector<SUnit *, 8> UseSUs;
1278
26
    for (auto I = PHISUs.begin(); I != PHISUs.end(); 
++I15
) {
1279
26
      for (auto &Dep : (*I)->Succs) {
1280
26
        if (Dep.getKind() != SDep::Data)
1281
11
          continue;
1282
15
1283
15
        SUnit *TmpSU = Dep.getSUnit();
1284
15
        MachineInstr *TmpMI = TmpSU->getInstr();
1285
15
        if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1286
4
          PHISUs.push_back(TmpSU);
1287
4
          continue;
1288
4
        }
1289
11
        UseSUs.push_back(TmpSU);
1290
11
      }
1291
15
    }
1292
11
1293
11
    if (UseSUs.size() == 0)
1294
0
      continue;
1295
11
1296
11
    SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1297
11
    // Add the artificial dependencies if it does not form a cycle.
1298
11
    for (auto I : UseSUs) {
1299
11
      for (auto Src : SrcSUs) {
1300
11
        if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1301
6
          Src->addPred(SDep(I, SDep::Artificial));
1302
6
          SDAG->Topo.AddPred(Src, I);
1303
6
        }
1304
11
      }
1305
11
    }
1306
11
  }
1307
192
}
1308
1309
/// Return true for DAG nodes that we ignore when computing the cost functions.
1310
/// We ignore the back-edge recurrence in order to avoid unbounded recursion
1311
/// in the calculation of the ASAP, ALAP, etc functions.
1312
30.6k
static bool ignoreDependence(const SDep &D, bool isPred) {
1313
30.6k
  if (D.isArtificial())
1314
67
    return true;
1315
30.5k
  return D.getKind() == SDep::Anti && 
isPred4.49k
;
1316
30.5k
}
1317
1318
/// Compute several functions need to order the nodes for scheduling.
1319
///  ASAP - Earliest time to schedule a node.
1320
///  ALAP - Latest time to schedule a node.
1321
///  MOV - Mobility function, difference between ALAP and ASAP.
1322
///  D - Depth of each node.
1323
///  H - Height of each node.
1324
190
void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1325
190
  ScheduleInfo.resize(SUnits.size());
1326
190
1327
190
  LLVM_DEBUG({
1328
190
    for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1329
190
                                                    E = Topo.end();
1330
190
         I != E; ++I) {
1331
190
      const SUnit &SU = SUnits[*I];
1332
190
      dumpNode(SU);
1333
190
    }
1334
190
  });
1335
190
1336
190
  int maxASAP = 0;
1337
190
  // Compute ASAP and ZeroLatencyDepth.
1338
190
  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1339
190
                                                  E = Topo.end();
1340
2.06k
       I != E; 
++I1.87k
) {
1341
1.87k
    int asap = 0;
1342
1.87k
    int zeroLatencyDepth = 0;
1343
1.87k
    SUnit *SU = &SUnits[*I];
1344
1.87k
    for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1345
1.87k
                                    EP = SU->Preds.end();
1346
4.52k
         IP != EP; 
++IP2.64k
) {
1347
2.64k
      SUnit *pred = IP->getSUnit();
1348
2.64k
      if (IP->getLatency() == 0)
1349
1.11k
        zeroLatencyDepth =
1350
1.11k
            std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1351
2.64k
      if (ignoreDependence(*IP, true))
1352
428
        continue;
1353
2.22k
      asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1354
2.22k
                                  getDistance(pred, SU, *IP) * MII));
1355
2.22k
    }
1356
1.87k
    maxASAP = std::max(maxASAP, asap);
1357
1.87k
    ScheduleInfo[*I].ASAP = asap;
1358
1.87k
    ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1359
1.87k
  }
1360
190
1361
190
  // Compute ALAP, ZeroLatencyHeight, and MOV.
1362
190
  for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
1363
190
                                                          E = Topo.rend();
1364
2.06k
       I != E; 
++I1.87k
) {
1365
1.87k
    int alap = maxASAP;
1366
1.87k
    int zeroLatencyHeight = 0;
1367
1.87k
    SUnit *SU = &SUnits[*I];
1368
1.87k
    for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1369
1.87k
                                    ES = SU->Succs.end();
1370
4.52k
         IS != ES; 
++IS2.65k
) {
1371
2.65k
      SUnit *succ = IS->getSUnit();
1372
2.65k
      if (IS->getLatency() == 0)
1373
1.11k
        zeroLatencyHeight =
1374
1.11k
            std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1375
2.65k
      if (ignoreDependence(*IS, true))
1376
432
        continue;
1377
2.22k
      alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1378
2.22k
                                  getDistance(SU, succ, *IS) * MII));
1379
2.22k
    }
1380
1.87k
1381
1.87k
    ScheduleInfo[*I].ALAP = alap;
1382
1.87k
    ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1383
1.87k
  }
1384
190
1385
190
  // After computing the node functions, compute the summary for each node set.
1386
190
  for (NodeSet &I : NodeSets)
1387
437
    I.computeNodeSetInfo(this);
1388
190
1389
190
  LLVM_DEBUG({
1390
190
    for (unsigned i = 0; i < SUnits.size(); i++) {
1391
190
      dbgs() << "\tNode " << i << ":\n";
1392
190
      dbgs() << "\t   ASAP = " << getASAP(&SUnits[i]) << "\n";
1393
190
      dbgs() << "\t   ALAP = " << getALAP(&SUnits[i]) << "\n";
1394
190
      dbgs() << "\t   MOV  = " << getMOV(&SUnits[i]) << "\n";
1395
190
      dbgs() << "\t   D    = " << getDepth(&SUnits[i]) << "\n";
1396
190
      dbgs() << "\t   H    = " << getHeight(&SUnits[i]) << "\n";
1397
190
      dbgs() << "\t   ZLD  = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1398
190
      dbgs() << "\t   ZLH  = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1399
190
    }
1400
190
  });
1401
190
}
1402
1403
/// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1404
/// as the predecessors of the elements of NodeOrder that are not also in
1405
/// NodeOrder.
1406
static bool pred_L(SetVector<SUnit *> &NodeOrder,
1407
                   SmallSetVector<SUnit *, 8> &Preds,
1408
871
                   const NodeSet *S = nullptr) {
1409
871
  Preds.clear();
1410
871
  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1411
7.74k
       I != E; 
++I6.87k
) {
1412
6.87k
    for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1413
18.8k
         PI != PE; 
++PI11.9k
) {
1414
11.9k
      if (S && 
S->count(PI->getSUnit()) == 03.86k
)
1415
3.16k
        continue;
1416
8.80k
      if (ignoreDependence(*PI, true))
1417
1.34k
        continue;
1418
7.46k
      if (NodeOrder.count(PI->getSUnit()) == 0)
1419
1.13k
        Preds.insert(PI->getSUnit());
1420
7.46k
    }
1421
6.87k
    // Back-edges are predecessors with an anti-dependence.
1422
6.87k
    for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1423
6.87k
                                    ES = (*I)->Succs.end();
1424
17.3k
         IS != ES; 
++IS10.4k
) {
1425
10.4k
      if (IS->getKind() != SDep::Anti)
1426
9.12k
        continue;
1427
1.31k
      if (S && 
S->count(IS->getSUnit()) == 0367
)
1428
303
        continue;
1429
1.00k
      if (NodeOrder.count(IS->getSUnit()) == 0)
1430
0
        Preds.insert(IS->getSUnit());
1431
1.00k
    }
1432
6.87k
  }
1433
871
  return !Preds.empty();
1434
871
}
1435
1436
/// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1437
/// as the successors of the elements of NodeOrder that are not also in
1438
/// NodeOrder.
1439
static bool succ_L(SetVector<SUnit *> &NodeOrder,
1440
                   SmallSetVector<SUnit *, 8> &Succs,
1441
2.54k
                   const NodeSet *S = nullptr) {
1442
2.54k
  Succs.clear();
1443
2.54k
  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1444
15.3k
       I != E; 
++I12.7k
) {
1445
12.7k
    for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1446
32.2k
         SI != SE; 
++SI19.4k
) {
1447
19.4k
      if (S && 
S->count(SI->getSUnit()) == 04.76k
)
1448
3.24k
        continue;
1449
16.2k
      if (ignoreDependence(*SI, false))
1450
36
        continue;
1451
16.2k
      if (NodeOrder.count(SI->getSUnit()) == 0)
1452
2.04k
        Succs.insert(SI->getSUnit());
1453
16.2k
    }
1454
12.7k
    for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1455
12.7k
                                    PE = (*I)->Preds.end();
1456
36.1k
         PI != PE; 
++PI23.3k
) {
1457
23.3k
      if (PI->getKind() != SDep::Anti)
1458
19.6k
        continue;
1459
3.72k
      if (S && 
S->count(PI->getSUnit()) == 0790
)
1460
475
        continue;
1461
3.24k
      if (NodeOrder.count(PI->getSUnit()) == 0)
1462
886
        Succs.insert(PI->getSUnit());
1463
3.24k
    }
1464
12.7k
  }
1465
2.54k
  return !Succs.empty();
1466
2.54k
}
1467
1468
/// Return true if there is a path from the specified node to any of the nodes
1469
/// in DestNodes. Keep track and return the nodes in any path.
1470
static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1471
                        SetVector<SUnit *> &DestNodes,
1472
                        SetVector<SUnit *> &Exclude,
1473
5.06k
                        SmallPtrSet<SUnit *, 8> &Visited) {
1474
5.06k
  if (Cur->isBoundaryNode())
1475
3
    return false;
1476
5.05k
  if (Exclude.count(Cur) != 0)
1477
787
    return false;
1478
4.27k
  if (DestNodes.count(Cur) != 0)
1479
542
    return true;
1480
3.72k
  if (!Visited.insert(Cur).second)
1481
839
    return Path.count(Cur) != 0;
1482
2.89k
  bool FoundPath = false;
1483
2.89k
  for (auto &SI : Cur->Succs)
1484
3.69k
    FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1485
2.89k
  for (auto &PI : Cur->Preds)
1486
4.41k
    if (PI.getKind() == SDep::Anti)
1487
291
      FoundPath |=
1488
291
          computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1489
2.89k
  if (FoundPath)
1490
696
    Path.insert(Cur);
1491
2.89k
  return FoundPath;
1492
2.89k
}
1493
1494
/// Return true if Set1 is a subset of Set2.
1495
593
template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1496
1.03k
  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; 
++I437
)
1497
781
    if (Set2.count(*I) == 0)
1498
344
      return false;
1499
593
  
return true249
;
1500
593
}
MachinePipeliner.cpp:bool isSubset<llvm::SmallSetVector<llvm::SUnit*, 8u>, llvm::SmallSetVector<llvm::SUnit*, 8u> >(llvm::SmallSetVector<llvm::SUnit*, 8u>&, llvm::SmallSetVector<llvm::SUnit*, 8u>&)
Line
Count
Source
1495
178
template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1496
244
  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; 
++I66
)
1497
201
    if (Set2.count(*I) == 0)
1498
135
      return false;
1499
178
  
return true43
;
1500
178
}
MachinePipeliner.cpp:bool isSubset<llvm::SmallSetVector<llvm::SUnit*, 8u>, llvm::NodeSet>(llvm::SmallSetVector<llvm::SUnit*, 8u>&, llvm::NodeSet&)
Line
Count
Source
1495
415
template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1496
786
  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; 
++I371
)
1497
580
    if (Set2.count(*I) == 0)
1498
209
      return false;
1499
415
  
return true206
;
1500
415
}
1501
1502
/// Compute the live-out registers for the instructions in a node-set.
1503
/// The live-out registers are those that are defined in the node-set,
1504
/// but not used. Except for use operands of Phis.
1505
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
1506
98
                            NodeSet &NS) {
1507
98
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1508
98
  MachineRegisterInfo &MRI = MF.getRegInfo();
1509
98
  SmallVector<RegisterMaskPair, 8> LiveOutRegs;
1510
98
  SmallSet<unsigned, 4> Uses;
1511
662
  for (SUnit *SU : NS) {
1512
662
    const MachineInstr *MI = SU->getInstr();
1513
662
    if (MI->isPHI())
1514
20
      continue;
1515
642
    for (const MachineOperand &MO : MI->operands())
1516
2.31k
      if (MO.isReg() && 
MO.isUse()1.93k
) {
1517
1.32k
        unsigned Reg = MO.getReg();
1518
1.32k
        if (TargetRegisterInfo::isVirtualRegister(Reg))
1519
1.31k
          Uses.insert(Reg);
1520
6
        else if (MRI.isAllocatable(Reg))
1521
8
          
for (MCRegUnitIterator Units(Reg, TRI); 4
Units.isValid();
++Units4
)
1522
4
            Uses.insert(*Units);
1523
1.32k
      }
1524
642
  }
1525
98
  for (SUnit *SU : NS)
1526
662
    for (const MachineOperand &MO : SU->getInstr()->operands())
1527
2.41k
      if (MO.isReg() && 
MO.isDef()1.99k
&&
!MO.isDead()632
) {
1528
625
        unsigned Reg = MO.getReg();
1529
625
        if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1530
621
          if (!Uses.count(Reg))
1531
95
            LiveOutRegs.push_back(RegisterMaskPair(Reg,
1532
95
                                                   LaneBitmask::getNone()));
1533
621
        } else 
if (4
MRI.isAllocatable(Reg)4
) {
1534
8
          for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); 
++Units4
)
1535
4
            if (!Uses.count(*Units))
1536
0
              LiveOutRegs.push_back(RegisterMaskPair(*Units,
1537
0
                                                     LaneBitmask::getNone()));
1538
4
        }
1539
625
      }
1540
98
  RPTracker.addLiveRegs(LiveOutRegs);
1541
98
}
1542
1543
/// A heuristic to filter nodes in recurrent node-sets if the register
1544
/// pressure of a set is too high.
1545
190
void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1546
437
  for (auto &NS : NodeSets) {
1547
437
    // Skip small node-sets since they won't cause register pressure problems.
1548
437
    if (NS.size() <= 2)
1549
339
      continue;
1550
98
    IntervalPressure RecRegPressure;
1551
98
    RegPressureTracker RecRPTracker(RecRegPressure);
1552
98
    RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1553
98
    computeLiveOuts(MF, RecRPTracker, NS);
1554
98
    RecRPTracker.closeBottom();
1555
98
1556
98
    std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1557
2.06k
    llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1558
2.06k
      return A->NodeNum > B->NodeNum;
1559
2.06k
    });
1560
98
1561
657
    for (auto &SU : SUnits) {
1562
657
      // Since we're computing the register pressure for a subset of the
1563
657
      // instructions in a block, we need to set the tracker for each
1564
657
      // instruction in the node-set. The tracker is set to the instruction
1565
657
      // just after the one we're interested in.
1566
657
      MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
1567
657
      RecRPTracker.setPos(std::next(CurInstI));
1568
657
1569
657
      RegPressureDelta RPDelta;
1570
657
      ArrayRef<PressureChange> CriticalPSets;
1571
657
      RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1572
657
                                             CriticalPSets,
1573
657
                                             RecRegPressure.MaxSetPressure);
1574
657
      if (RPDelta.Excess.isValid()) {
1575
1
        LLVM_DEBUG(
1576
1
            dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1577
1
                   << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1578
1
                   << ":" << RPDelta.Excess.getUnitInc());
1579
1
        NS.setExceedPressure(SU);
1580
1
        break;
1581
1
      }
1582
656
      RecRPTracker.recede();
1583
656
    }
1584
98
  }
1585
190
}
1586
1587
/// A heuristic to colocate node sets that have the same set of
1588
/// successors.
1589
190
void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1590
190
  unsigned Colocate = 0;
1591
627
  for (int i = 0, e = NodeSets.size(); i < e; 
++i437
) {
1592
437
    NodeSet &N1 = NodeSets[i];
1593
437
    SmallSetVector<SUnit *, 8> S1;
1594
437
    if (N1.empty() || !succ_L(N1, S1))
1595
105
      continue;
1596
789
    
for (int j = i + 1; 332
j < e;
++j457
) {
1597
485
      NodeSet &N2 = NodeSets[j];
1598
485
      if (N1.compareRecMII(N2) != 0)
1599
236
        continue;
1600
249
      SmallSetVector<SUnit *, 8> S2;
1601
249
      if (N2.empty() || !succ_L(N2, S2))
1602
71
        continue;
1603
178
      if (isSubset(S1, S2) && 
S1.size() == S2.size()43
) {
1604
28
        N1.setColocate(++Colocate);
1605
28
        N2.setColocate(Colocate);
1606
28
        break;
1607
28
      }
1608
178
    }
1609
332
  }
1610
190
}
1611
1612
/// Check if the existing node-sets are profitable. If not, then ignore the
1613
/// recurrent node-sets, and attempt to schedule all nodes together. This is
1614
/// a heuristic. If the MII is large and all the recurrent node-sets are small,
1615
/// then it's best to try to schedule all instructions together instead of
1616
/// starting with the recurrent node-sets.
1617
190
void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1618
190
  // Look for loops with a large MII.
1619
190
  if (MII < 17)
1620
189
    return;
1621
1
  // Check if the node-set contains only a simple add recurrence.
1622
3
  
for (auto &NS : NodeSets)1
{
1623
3
    if (NS.getRecMII() > 2)
1624
0
      return;
1625
3
    if (NS.getMaxDepth() > MII)
1626
0
      return;
1627
3
  }
1628
1
  NodeSets.clear();
1629
1
  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1630
1
  return;
1631
1
}
1632
1633
/// Add the nodes that do not belong to a recurrence set into groups
1634
/// based upon connected componenets.
1635
190
void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1636
190
  SetVector<SUnit *> NodesAdded;
1637
190
  SmallPtrSet<SUnit *, 8> Visited;
1638
190
  // Add the nodes that are on a path between the previous node sets and
1639
190
  // the current node set.
1640
434
  for (NodeSet &I : NodeSets) {
1641
434
    SmallSetVector<SUnit *, 8> N;
1642
434
    // Add the nodes from the current node set to the previous node set.
1643
434
    if (succ_L(I, N)) {
1644
329
      SetVector<SUnit *> Path;
1645
624
      for (SUnit *NI : N) {
1646
624
        Visited.clear();
1647
624
        computePath(NI, Path, NodesAdded, I, Visited);
1648
624
      }
1649
329
      if (!Path.empty())
1650
59
        I.insert(Path.begin(), Path.end());
1651
329
    }
1652
434
    // Add the nodes from the previous node set to the current node set.
1653
434
    N.clear();
1654
434
    if (succ_L(NodesAdded, N)) {
1655
179
      SetVector<SUnit *> Path;
1656
448
      for (SUnit *NI : N) {
1657
448
        Visited.clear();
1658
448
        computePath(NI, Path, I, NodesAdded, Visited);
1659
448
      }
1660
179
      if (!Path.empty())
1661
31
        I.insert(Path.begin(), Path.end());
1662
179
    }
1663
434
    NodesAdded.insert(I.begin(), I.end());
1664
434
  }
1665
190
1666
190
  // Create a new node set with the connected nodes of any successor of a node
1667
190
  // in a recurrent set.
1668
190
  NodeSet NewSet;
1669
190
  SmallSetVector<SUnit *, 8> N;
1670
190
  if (succ_L(NodesAdded, N))
1671
77
    for (SUnit *I : N)
1672
108
      addConnectedNodes(I, NewSet, NodesAdded);
1673
190
  if (!NewSet.empty())
1674
77
    NodeSets.push_back(NewSet);
1675
190
1676
190
  // Create a new node set with the connected nodes of any predecessor of a node
1677
190
  // in a recurrent set.
1678
190
  NewSet.clear();
1679
190
  if (pred_L(NodesAdded, N))
1680
15
    for (SUnit *I : N)
1681
21
      addConnectedNodes(I, NewSet, NodesAdded);
1682
190
  if (!NewSet.empty())
1683
15
    NodeSets.push_back(NewSet);
1684
190
1685
190
  // Create new nodes sets with the connected nodes any remaining node that
1686
190
  // has no predecessor.
1687
2.06k
  for (unsigned i = 0; i < SUnits.size(); 
++i1.87k
) {
1688
1.87k
    SUnit *SU = &SUnits[i];
1689
1.87k
    if (NodesAdded.count(SU) == 0) {
1690
10
      NewSet.clear();
1691
10
      addConnectedNodes(SU, NewSet, NodesAdded);
1692
10
      if (!NewSet.empty())
1693
10
        NodeSets.push_back(NewSet);
1694
10
    }
1695
1.87k
  }
1696
190
}
1697
1698
/// Add the node to the set, and add all of its connected nodes to the set.
1699
void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1700
467
                                          SetVector<SUnit *> &NodesAdded) {
1701
467
  NewSet.insert(SU);
1702
467
  NodesAdded.insert(SU);
1703
507
  for (auto &SI : SU->Succs) {
1704
507
    SUnit *Successor = SI.getSUnit();
1705
507
    if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1706
183
      addConnectedNodes(Successor, NewSet, NodesAdded);
1707
507
  }
1708
534
  for (auto &PI : SU->Preds) {
1709
534
    SUnit *Predecessor = PI.getSUnit();
1710
534
    if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1711
145
      addConnectedNodes(Predecessor, NewSet, NodesAdded);
1712
534
  }
1713
467
}
1714
1715
/// Return true if Set1 contains elements in Set2. The elements in common
1716
/// are returned in a different container.
1717
static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1718
315
                        SmallSetVector<SUnit *, 8> &Result) {
1719
315
  Result.clear();
1720
645
  for (unsigned i = 0, e = Set1.size(); i != e; 
++i330
) {
1721
330
    SUnit *SU = Set1[i];
1722
330
    if (Set2.count(SU) != 0)
1723
69
      Result.insert(SU);
1724
330
  }
1725
315
  return !Result.empty();
1726
315
}
1727
1728
/// Merge the recurrence node sets that have the same initial node.
1729
192
void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1730
634
  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1731
442
       ++I) {
1732
442
    NodeSet &NI = *I;
1733
2.01k
    for (NodeSetType::iterator J = I + 1; J != E;) {
1734
1.57k
      NodeSet &NJ = *J;
1735
1.57k
      if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1736
181
        if (NJ.compareRecMII(NI) > 0)
1737
21
          NI.setRecMII(NJ.getRecMII());
1738
1.05k
        for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1739
878
             ++NII)
1740
878
          I->insert(*NII);
1741
181
        NodeSets.erase(J);
1742
181
        E = NodeSets.end();
1743
1.39k
      } else {
1744
1.39k
        ++J;
1745
1.39k
      }
1746
1.57k
    }
1747
442
  }
1748
192
}
1749
1750
/// Remove nodes that have been scheduled in previous NodeSets.
1751
190
void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1752
711
  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1753
521
       ++I)
1754
1.31k
    
for (NodeSetType::iterator J = I + 1; 521
J != E;) {
1755
2.96k
      J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1756
789
1757
789
      if (J->empty()) {
1758
15
        NodeSets.erase(J);
1759
15
        E = NodeSets.end();
1760
774
      } else {
1761
774
        ++J;
1762
774
      }
1763
789
    }
1764
190
}
1765
1766
/// Compute an ordered list of the dependence graph nodes, which
1767
/// indicates the order that the nodes will be scheduled.  This is a
1768
/// two-level algorithm. First, a partial order is created, which
1769
/// consists of a list of sets ordered from highest to lowest priority.
1770
190
void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1771
190
  SmallSetVector<SUnit *, 8> R;
1772
190
  NodeOrder.clear();
1773
190
1774
521
  for (auto &Nodes : NodeSets) {
1775
521
    LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1776
521
    OrderKind Order;
1777
521
    SmallSetVector<SUnit *, 8> N;
1778
521
    if (pred_L(NodeOrder, N) && 
isSubset(N, Nodes)207
) {
1779
104
      R.insert(N.begin(), N.end());
1780
104
      Order = BottomUp;
1781
104
      LLVM_DEBUG(dbgs() << "  Bottom up (preds) ");
1782
417
    } else if (succ_L(NodeOrder, N) && 
isSubset(N, Nodes)208
) {
1783
102
      R.insert(N.begin(), N.end());
1784
102
      Order = TopDown;
1785
102
      LLVM_DEBUG(dbgs() << "  Top down (succs) ");
1786
315
    } else if (isIntersect(N, Nodes, R)) {
1787
45
      // If some of the successors are in the existing node-set, then use the
1788
45
      // top-down ordering.
1789
45
      Order = TopDown;
1790
45
      LLVM_DEBUG(dbgs() << "  Top down (intersect) ");
1791
270
    } else if (NodeSets.size() == 1) {
1792
11
      for (auto &N : Nodes)
1793
198
        if (N->Succs.size() == 0)
1794
18
          R.insert(N);
1795
11
      Order = BottomUp;
1796
11
      LLVM_DEBUG(dbgs() << "  Bottom up (all) ");
1797
259
    } else {
1798
259
      // Find the node with the highest ASAP.
1799
259
      SUnit *maxASAP = nullptr;
1800
713
      for (SUnit *SU : Nodes) {
1801
713
        if (maxASAP == nullptr || 
getASAP(SU) > getASAP(maxASAP)454
||
1802
713
            
(259
getASAP(SU) == getASAP(maxASAP)259
&&
SU->NodeNum > maxASAP->NodeNum211
))
1803
654
          maxASAP = SU;
1804
713
      }
1805
259
      R.insert(maxASAP);
1806
259
      Order = BottomUp;
1807
259
      LLVM_DEBUG(dbgs() << "  Bottom up (default) ");
1808
259
    }
1809
521
1810
1.06k
    while (!R.empty()) {
1811
546
      if (Order == TopDown) {
1812
160
        // Choose the node with the maximum height.  If more than one, choose
1813
160
        // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1814
160
        // choose the node with the lowest MOV.
1815
675
        while (!R.empty()) {
1816
515
          SUnit *maxHeight = nullptr;
1817
1.17k
          for (SUnit *I : R) {
1818
1.17k
            if (maxHeight == nullptr || 
getHeight(I) > getHeight(maxHeight)656
)
1819
609
              maxHeight = I;
1820
562
            else if (getHeight(I) == getHeight(maxHeight) &&
1821
562
                     
getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight)308
)
1822
27
              maxHeight = I;
1823
535
            else if (getHeight(I) == getHeight(maxHeight) &&
1824
535
                     getZeroLatencyHeight(I) ==
1825
281
                         getZeroLatencyHeight(maxHeight) &&
1826
535
                     
getMOV(I) < getMOV(maxHeight)258
)
1827
22
              maxHeight = I;
1828
1.17k
          }
1829
515
          NodeOrder.insert(maxHeight);
1830
515
          LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1831
515
          R.remove(maxHeight);
1832
573
          for (const auto &I : maxHeight->Succs) {
1833
573
            if (Nodes.count(I.getSUnit()) == 0)
1834
160
              continue;
1835
413
            if (NodeOrder.count(I.getSUnit()) != 0)
1836
107
              continue;
1837
306
            if (ignoreDependence(I, false))
1838
0
              continue;
1839
306
            R.insert(I.getSUnit());
1840
306
          }
1841
515
          // Back-edges are predecessors with an anti-dependence.
1842
753
          for (const auto &I : maxHeight->Preds) {
1843
753
            if (I.getKind() != SDep::Anti)
1844
689
              continue;
1845
64
            if (Nodes.count(I.getSUnit()) == 0)
1846
6
              continue;
1847
58
            if (NodeOrder.count(I.getSUnit()) != 0)
1848
2
              continue;
1849
56
            R.insert(I.getSUnit());
1850
56
          }
1851
515
        }
1852
160
        Order = BottomUp;
1853
160
        LLVM_DEBUG(dbgs() << "\n   Switching order to bottom up ");
1854
160
        SmallSetVector<SUnit *, 8> N;
1855
160
        if (pred_L(NodeOrder, N, &Nodes))
1856
12
          R.insert(N.begin(), N.end());
1857
386
      } else {
1858
386
        // Choose the node with the maximum depth.  If more than one, choose
1859
386
        // the node with the maximum ZeroLatencyDepth. If still more than one,
1860
386
        // choose the node with the lowest MOV.
1861
1.74k
        while (!R.empty()) {
1862
1.35k
          SUnit *maxDepth = nullptr;
1863
3.33k
          for (SUnit *I : R) {
1864
3.33k
            if (maxDepth == nullptr || 
getDepth(I) > getDepth(maxDepth)1.98k
)
1865
1.74k
              maxDepth = I;
1866
1.59k
            else if (getDepth(I) == getDepth(maxDepth) &&
1867
1.59k
                     
getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth)887
)
1868
51
              maxDepth = I;
1869
1.54k
            else if (getDepth(I) == getDepth(maxDepth) &&
1870
1.54k
                     
getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth)836
&&
1871
1.54k
                     
getMOV(I) < getMOV(maxDepth)702
)
1872
150
              maxDepth = I;
1873
3.33k
          }
1874
1.35k
          NodeOrder.insert(maxDepth);
1875
1.35k
          LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1876
1.35k
          R.remove(maxDepth);
1877
1.35k
          if (Nodes.isExceedSU(maxDepth)) {
1878
1
            Order = TopDown;
1879
1
            R.clear();
1880
1
            R.insert(Nodes.getNode(0));
1881
1
            break;
1882
1
          }
1883
1.89k
          
for (const auto &I : maxDepth->Preds)1.35k
{
1884
1.89k
            if (Nodes.count(I.getSUnit()) == 0)
1885
475
              continue;
1886
1.41k
            if (NodeOrder.count(I.getSUnit()) != 0)
1887
102
              continue;
1888
1.31k
            R.insert(I.getSUnit());
1889
1.31k
          }
1890
1.35k
          // Back-edges are predecessors with an anti-dependence.
1891
2.07k
          for (const auto &I : maxDepth->Succs) {
1892
2.07k
            if (I.getKind() != SDep::Anti)
1893
1.77k
              continue;
1894
309
            if (Nodes.count(I.getSUnit()) == 0)
1895
41
              continue;
1896
268
            if (NodeOrder.count(I.getSUnit()) != 0)
1897
214
              continue;
1898
54
            R.insert(I.getSUnit());
1899
54
          }
1900
1.35k
        }
1901
386
        Order = TopDown;
1902
386
        LLVM_DEBUG(dbgs() << "\n   Switching order to top down ");
1903
386
        SmallSetVector<SUnit *, 8> N;
1904
386
        if (succ_L(NodeOrder, N, &Nodes))
1905
13
          R.insert(N.begin(), N.end());
1906
386
      }
1907
546
    }
1908
521
    LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1909
521
  }
1910
190
1911
190
  LLVM_DEBUG({
1912
190
    dbgs() << "Node order: ";
1913
190
    for (SUnit *I : NodeOrder)
1914
190
      dbgs() << " " << I->NodeNum << " ";
1915
190
    dbgs() << "\n";
1916
190
  });
1917
190
}
1918
1919
/// Process the nodes in the computed order and create the pipelined schedule
1920
/// of the instructions, if possible. Return true if a schedule is found.
1921
190
bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1922
190
1923
190
  if (NodeOrder.empty()){
1924
8
    LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
1925
8
    return false;
1926
8
  }
1927
182
1928
182
  bool scheduleFound = false;
1929
182
  unsigned II = 0;
1930
182
  // Keep increasing II until a valid schedule is found.
1931
470
  for (II = MII; II <= MAX_II && 
!scheduleFound465
;
++II288
) {
1932
288
    Schedule.reset();
1933
288
    Schedule.setInitiationInterval(II);
1934
288
    LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1935
288
1936
288
    SetVector<SUnit *>::iterator NI = NodeOrder.begin();
1937
288
    SetVector<SUnit *>::iterator NE = NodeOrder.end();
1938
3.24k
    do {
1939
3.24k
      SUnit *SU = *NI;
1940
3.24k
1941
3.24k
      // Compute the schedule time for the instruction, which is based
1942
3.24k
      // upon the scheduled time for any predecessors/successors.
1943
3.24k
      int EarlyStart = INT_MIN;
1944
3.24k
      int LateStart = INT_MAX;
1945
3.24k
      // These values are set when the size of the schedule window is limited
1946
3.24k
      // due to chain dependences.
1947
3.24k
      int SchedEnd = INT_MAX;
1948
3.24k
      int SchedStart = INT_MIN;
1949
3.24k
      Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1950
3.24k
                            II, this);
1951
3.24k
      LLVM_DEBUG({
1952
3.24k
        dbgs() << "\n";
1953
3.24k
        dbgs() << "Inst (" << SU->NodeNum << ") ";
1954
3.24k
        SU->getInstr()->dump();
1955
3.24k
        dbgs() << "\n";
1956
3.24k
      });
1957
3.24k
      LLVM_DEBUG({
1958
3.24k
        dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
1959
3.24k
                         LateStart, SchedEnd, SchedStart);
1960
3.24k
      });
1961
3.24k
1962
3.24k
      if (EarlyStart > LateStart || 
SchedEnd < EarlyStart3.19k
||
1963
3.24k
          
SchedStart > LateStart3.19k
)
1964
92
        scheduleFound = false;
1965
3.15k
      else if (EarlyStart != INT_MIN && 
LateStart == INT_MAX1.08k
) {
1966
650
        SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
1967
650
        scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1968
2.50k
      } else if (EarlyStart == INT_MIN && 
LateStart != INT_MAX2.07k
) {
1969
1.71k
        SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
1970
1.71k
        scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
1971
1.71k
      } else 
if (789
EarlyStart != INT_MIN789
&&
LateStart != INT_MAX430
) {
1972
430
        SchedEnd =
1973
430
            std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
1974
430
        // When scheduling a Phi it is better to start at the late cycle and go
1975
430
        // backwards. The default order may insert the Phi too far away from
1976
430
        // its first dependence.
1977
430
        if (SU->getInstr()->isPHI())
1978
333
          scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
1979
97
        else
1980
97
          scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1981
430
      } else {
1982
359
        int FirstCycle = Schedule.getFirstCycle();
1983
359
        scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
1984
359
                                        FirstCycle + getASAP(SU) + II - 1, II);
1985
359
      }
1986
3.24k
      // Even if we find a schedule, make sure the schedule doesn't exceed the
1987
3.24k
      // allowable number of stages. We keep trying if this happens.
1988
3.24k
      if (scheduleFound)
1989
3.14k
        if (SwpMaxStages > -1 &&
1990
3.14k
            Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
1991
6
          scheduleFound = false;
1992
3.24k
1993
3.24k
      LLVM_DEBUG({
1994
3.24k
        if (!scheduleFound)
1995
3.24k
          dbgs() << "\tCan't schedule\n";
1996
3.24k
      });
1997
3.24k
    } while (++NI != NE && 
scheduleFound3.06k
);
1998
288
1999
288
    // If a schedule is found, check if it is a valid schedule too.
2000
288
    if (scheduleFound)
2001
177
      scheduleFound = Schedule.isValidSchedule(this);
2002
288
  }
2003
182
2004
182
  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
2005
182
                    << ")\n");
2006
182
2007
182
  if (scheduleFound)
2008
177
    Schedule.finalizeSchedule(this);
2009
5
  else
2010
5
    Schedule.reset();
2011
182
2012
182
  return scheduleFound && 
Schedule.getMaxStageCount() > 0177
;
2013
182
}
2014
2015
/// Given a schedule for the loop, generate a new version of the loop,
2016
/// and replace the old version.  This function generates a prolog
2017
/// that contains the initial iterations in the pipeline, and kernel
2018
/// loop, and the epilogue that contains the code for the final
2019
/// iterations.
2020
124
void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2021
124
  // Create a new basic block for the kernel and add it to the CFG.
2022
124
  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2023
124
2024
124
  unsigned MaxStageCount = Schedule.getMaxStageCount();
2025
124
2026
124
  // Remember the registers that are used in different stages. The index is
2027
124
  // the iteration, or stage, that the instruction is scheduled in.  This is
2028
124
  // a map between register names in the original block and the names created
2029
124
  // in each stage of the pipelined loop.
2030
124
  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2031
124
  InstrMapTy InstrMap;
2032
124
2033
124
  SmallVector<MachineBasicBlock *, 4> PrologBBs;
2034
124
2035
124
  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2036
124
  assert(PreheaderBB != nullptr &&
2037
124
         "Need to add code to handle loops w/o preheader");
2038
124
  // Generate the prolog instructions that set up the pipeline.
2039
124
  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2040
124
  MF.insert(BB->getIterator(), KernelBB);
2041
124
2042
124
  // Rearrange the instructions to generate the new, pipelined loop,
2043
124
  // and update register names as needed.
2044
124
  for (int Cycle = Schedule.getFirstCycle(),
2045
124
           LastCycle = Schedule.getFinalCycle();
2046
497
       Cycle <= LastCycle; 
++Cycle373
) {
2047
373
    std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2048
373
    // This inner loop schedules each instruction in the cycle.
2049
1.26k
    for (SUnit *CI : CycleInstrs) {
2050
1.26k
      if (CI->getInstr()->isPHI())
2051
266
        continue;
2052
994
      unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2053
994
      MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2054
994
      updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2055
994
      KernelBB->push_back(NewMI);
2056
994
      InstrMap[NewMI] = CI->getInstr();
2057
994
    }
2058
373
  }
2059
124
2060
124
  // Copy any terminator instructions to the new kernel, and update
2061
124
  // names as needed.
2062
124
  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2063
124
                                   E = BB->instr_end();
2064
372
       I != E; 
++I248
) {
2065
248
    MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2066
248
    updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2067
248
    KernelBB->push_back(NewMI);
2068
248
    InstrMap[NewMI] = &*I;
2069
248
  }
2070
124
2071
124
  KernelBB->transferSuccessors(BB);
2072
124
  KernelBB->replaceSuccessor(BB, KernelBB);
2073
124
2074
124
  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2075
124
                       VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2076
124
  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2077
124
               InstrMap, MaxStageCount, MaxStageCount, false);
2078
124
2079
124
  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2080
124
2081
124
  SmallVector<MachineBasicBlock *, 4> EpilogBBs;
2082
124
  // Generate the epilog instructions to complete the pipeline.
2083
124
  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2084
124
                 PrologBBs);
2085
124
2086
124
  // We need this step because the register allocation doesn't handle some
2087
124
  // situations well, so we insert copies to help out.
2088
124
  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2089
124
2090
124
  // Remove dead instructions due to loop induction variables.
2091
124
  removeDeadInstructions(KernelBB, EpilogBBs);
2092
124
2093
124
  // Add branches between prolog and epilog blocks.
2094
124
  addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2095
124
2096
124
  // Remove the original loop since it's no longer referenced.
2097
124
  for (auto &I : *BB)
2098
1.50k
    LIS.RemoveMachineInstrFromMaps(I);
2099
124
  BB->clear();
2100
124
  BB->eraseFromParent();
2101
124
2102
124
  delete[] VRMap;
2103
124
}
2104
2105
/// Generate the pipeline prolog code.
2106
void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2107
                                       MachineBasicBlock *KernelBB,
2108
                                       ValueMapTy *VRMap,
2109
124
                                       MBBVectorTy &PrologBBs) {
2110
124
  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2111
124
  assert(PreheaderBB != nullptr &&
2112
124
         "Need to add code to handle loops w/o preheader");
2113
124
  MachineBasicBlock *PredBB = PreheaderBB;
2114
124
  InstrMapTy InstrMap;
2115
124
2116
124
  // Generate a basic block for each stage, not including the last stage,
2117
124
  // which will be generated in the kernel. Each basic block may contain
2118
124
  // instructions from multiple stages/iterations.
2119
294
  for (unsigned i = 0; i < LastStage; 
++i170
) {
2120
170
    // Create and insert the prolog basic block prior to the original loop
2121
170
    // basic block.  The original loop is removed later.
2122
170
    MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2123
170
    PrologBBs.push_back(NewBB);
2124
170
    MF.insert(BB->getIterator(), NewBB);
2125
170
    NewBB->transferSuccessors(PredBB);
2126
170
    PredBB->addSuccessor(NewBB);
2127
170
    PredBB = NewBB;
2128
170
2129
170
    // Generate instructions for each appropriate stage. Process instructions
2130
170
    // in original program order.
2131
396
    for (int StageNum = i; StageNum >= 0; 
--StageNum226
) {
2132
226
      for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2133
226
                                       BBE = BB->getFirstTerminator();
2134
2.72k
           BBI != BBE; 
++BBI2.49k
) {
2135
2.49k
        if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2136
1.06k
          if (BBI->isPHI())
2137
258
            continue;
2138
808
          MachineInstr *NewMI =
2139
808
              cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2140
808
          updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2141
808
                            VRMap);
2142
808
          NewBB->push_back(NewMI);
2143
808
          InstrMap[NewMI] = &*BBI;
2144
808
        }
2145
2.49k
      }
2146
226
    }
2147
170
    rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2148
170
    LLVM_DEBUG({
2149
170
      dbgs() << "prolog:\n";
2150
170
      NewBB->dump();
2151
170
    });
2152
170
  }
2153
124
2154
124
  PredBB->replaceSuccessor(BB, KernelBB);
2155
124
2156
124
  // Check if we need to remove the branch from the preheader to the original
2157
124
  // loop, and replace it with a branch to the new loop.
2158
124
  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2159
124
  if (numBranches) {
2160
35
    SmallVector<MachineOperand, 0> Cond;
2161
35
    TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2162
35
  }
2163
124
}
2164
2165
/// Generate the pipeline epilog code. The epilog code finishes the iterations
2166
/// that were started in either the prolog or the kernel.  We create a basic
2167
/// block for each stage that needs to complete.
2168
void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2169
                                       MachineBasicBlock *KernelBB,
2170
                                       ValueMapTy *VRMap,
2171
                                       MBBVectorTy &EpilogBBs,
2172
124
                                       MBBVectorTy &PrologBBs) {
2173
124
  // We need to change the branch from the kernel to the first epilog block, so
2174
124
  // this call to analyze branch uses the kernel rather than the original BB.
2175
124
  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2176
124
  SmallVector<MachineOperand, 4> Cond;
2177
124
  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2178
124
  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2179
124
  if (checkBranch)
2180
0
    return;
2181
124
2182
124
  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2183
124
  if (*LoopExitI == KernelBB)
2184
68
    ++LoopExitI;
2185
124
  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2186
124
  MachineBasicBlock *LoopExitBB = *LoopExitI;
2187
124
2188
124
  MachineBasicBlock *PredBB = KernelBB;
2189
124
  MachineBasicBlock *EpilogStart = LoopExitBB;
2190
124
  InstrMapTy InstrMap;
2191
124
2192
124
  // Generate a basic block for each stage, not including the last stage,
2193
124
  // which was generated for the kernel.  Each basic block may contain
2194
124
  // instructions from multiple stages/iterations.
2195
124
  int EpilogStage = LastStage + 1;
2196
294
  for (unsigned i = LastStage; i >= 1; 
--i, ++EpilogStage170
) {
2197
170
    MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
2198
170
    EpilogBBs.push_back(NewBB);
2199
170
    MF.insert(BB->getIterator(), NewBB);
2200
170
2201
170
    PredBB->replaceSuccessor(LoopExitBB, NewBB);
2202
170
    NewBB->addSuccessor(LoopExitBB);
2203
170
2204
170
    if (EpilogStart == LoopExitBB)
2205
124
      EpilogStart = NewBB;
2206
170
2207
170
    // Add instructions to the epilog depending on the current block.
2208
170
    // Process instructions in original program order.
2209
396
    for (unsigned StageNum = i; StageNum <= LastStage; 
++StageNum226
) {
2210
2.94k
      for (auto &BBI : *BB) {
2211
2.94k
        if (BBI.isPHI())
2212
536
          continue;
2213
2.41k
        MachineInstr *In = &BBI;
2214
2.41k
        if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2215
634
          // Instructions with memoperands in the epilog are updated with
2216
634
          // conservative values.
2217
634
          MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2218
634
          updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2219
634
          NewBB->push_back(NewMI);
2220
634
          InstrMap[NewMI] = In;
2221
634
        }
2222
2.41k
      }
2223
226
    }
2224
170
    generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2225
170
                         VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2226
170
    generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2227
170
                 InstrMap, LastStage, EpilogStage, i == 1);
2228
170
    PredBB = NewBB;
2229
170
2230
170
    LLVM_DEBUG({
2231
170
      dbgs() << "epilog:\n";
2232
170
      NewBB->dump();
2233
170
    });
2234
170
  }
2235
124
2236
124
  // Fix any Phi nodes in the loop exit block.
2237
134
  for (MachineInstr &MI : *LoopExitBB) {
2238
134
    if (!MI.isPHI())
2239
120
      break;
2240
45
    
for (unsigned i = 2, e = MI.getNumOperands() + 1; 14
i != e;
i += 231
) {
2241
31
      MachineOperand &MO = MI.getOperand(i);
2242
31
      if (MO.getMBB() == BB)
2243
14
        MO.setMBB(PredBB);
2244
31
    }
2245
14
  }
2246
124
2247
124
  // Create a branch to the new epilog from the kernel.
2248
124
  // Remove the original branch and add a new branch to the epilog.
2249
124
  TII->removeBranch(*KernelBB);
2250
124
  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2251
124
  // Add a branch to the loop exit.
2252
124
  if (EpilogBBs.size() > 0) {
2253
124
    MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2254
124
    SmallVector<MachineOperand, 4> Cond1;
2255
124
    TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2256
124
  }
2257
124
}
2258
2259
/// Replace all uses of FromReg that appear outside the specified
2260
/// basic block with ToReg.
2261
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2262
                                    MachineBasicBlock *MBB,
2263
                                    MachineRegisterInfo &MRI,
2264
942
                                    LiveIntervals &LIS) {
2265
942
  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2266
942
                                         E = MRI.use_end();
2267
2.28k
       I != E;) {
2268
1.34k
    MachineOperand &O = *I;
2269
1.34k
    ++I;
2270
1.34k
    if (O.getParent()->getParent() != MBB)
2271
71
      O.setReg(ToReg);
2272
1.34k
  }
2273
942
  if (!LIS.hasInterval(ToReg))
2274
942
    LIS.createEmptyInterval(ToReg);
2275
942
}
2276
2277
/// Return true if the register has a use that occurs outside the
2278
/// specified loop.
2279
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2280
393
                            MachineRegisterInfo &MRI) {
2281
393
  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
2282
393
                                         E = MRI.use_end();
2283
838
       I != E; 
++I445
)
2284
454
    if (I->getParent()->getParent() != BB)
2285
9
      return true;
2286
393
  
return false384
;
2287
393
}
2288
2289
/// Generate Phis for the specific block in the generated pipelined code.
2290
/// This function looks at the Phis from the original code to guide the
2291
/// creation of new Phis.
2292
void SwingSchedulerDAG::generateExistingPhis(
2293
    MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
2294
    MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2295
    InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2296
294
    bool IsLast) {
2297
294
  // Compute the stage number for the initial value of the Phi, which
2298
294
  // comes from the prolog. The prolog to use depends on to which kernel/
2299
294
  // epilog that we're adding the Phi.
2300
294
  unsigned PrologStage = 0;
2301
294
  unsigned PrevStage = 0;
2302
294
  bool InKernel = (LastStageNum == CurStageNum);
2303
294
  if (InKernel) {
2304
124
    PrologStage = LastStageNum - 1;
2305
124
    PrevStage = CurStageNum;
2306
170
  } else {
2307
170
    PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2308
170
    PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2309
170
  }
2310
294
2311
294
  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2312
294
                                   BBE = BB->getFirstNonPHI();
2313
951
       BBI != BBE; 
++BBI657
) {
2314
657
    unsigned Def = BBI->getOperand(0).getReg();
2315
657
2316
657
    unsigned InitVal = 0;
2317
657
    unsigned LoopVal = 0;
2318
657
    getPhiRegs(*BBI, BB, InitVal, LoopVal);
2319
657
2320
657
    unsigned PhiOp1 = 0;
2321
657
    // The Phi value from the loop body typically is defined in the loop, but
2322
657
    // not always. So, we need to check if the value is defined in the loop.
2323
657
    unsigned PhiOp2 = LoopVal;
2324
657
    if (VRMap[LastStageNum].count(LoopVal))
2325
649
      PhiOp2 = VRMap[LastStageNum][LoopVal];
2326
657
2327
657
    int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2328
657
    int LoopValStage =
2329
657
        Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2330
657
    unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2331
657
    if (NumStages == 0) {
2332
7
      // We don't need to generate a Phi anymore, but we need to rename any uses
2333
7
      // of the Phi value.
2334
7
      unsigned NewReg = VRMap[PrevStage][LoopVal];
2335
7
      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2336
7
                            Def, InitVal, NewReg);
2337
7
      if (VRMap[CurStageNum].count(LoopVal))
2338
7
        VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2339
7
    }
2340
657
    // Adjust the number of Phis needed depending on the number of prologs left,
2341
657
    // and the distance from where the Phi is first scheduled. The number of
2342
657
    // Phis cannot exceed the number of prolog stages. Each stage can
2343
657
    // potentially define two values.
2344
657
    unsigned MaxPhis = PrologStage + 2;
2345
657
    if (!InKernel && 
(int)PrologStage <= LoopValStage391
)
2346
329
      MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2347
657
    unsigned NumPhis = std::min(NumStages, MaxPhis);
2348
657
2349
657
    unsigned NewReg = 0;
2350
657
    unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : 
StageScheduled0
;
2351
657
    // In the epilog, we may need to look back one stage to get the correct
2352
657
    // Phi name because the epilog and prolog blocks execute the same stage.
2353
657
    // The correct name is from the previous block only when the Phi has
2354
657
    // been completely scheduled prior to the epilog, and Phi value is not
2355
657
    // needed in multiple stages.
2356
657
    int StageDiff = 0;
2357
657
    if (!InKernel && 
StageScheduled >= LoopValStage391
&&
AccessStage == 0367
&&
2358
657
        
NumPhis == 1222
)
2359
199
      StageDiff = 1;
2360
657
    // Adjust the computations below when the phi and the loop definition
2361
657
    // are scheduled in different stages.
2362
657
    if (InKernel && 
LoopValStage != -1266
&&
StageScheduled > LoopValStage266
)
2363
1
      StageDiff = StageScheduled - LoopValStage;
2364
1.36k
    for (unsigned np = 0; np < NumPhis; 
++np703
) {
2365
703
      // If the Phi hasn't been scheduled, then use the initial Phi operand
2366
703
      // value. Otherwise, use the scheduled version of the instruction. This
2367
703
      // is a little complicated when a Phi references another Phi.
2368
703
      if (np > PrologStage || 
StageScheduled >= (int)LastStageNum675
)
2369
216
        PhiOp1 = InitVal;
2370
487
      // Check if the Phi has already been scheduled in a prolog stage.
2371
487
      else if (PrologStage >= AccessStage + StageDiff + np &&
2372
487
               
VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0300
)
2373
274
        PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2374
213
      // Check if the Phi has already been scheduled, but the loop instruction
2375
213
      // is either another Phi, or doesn't occur in the loop.
2376
213
      else if (PrologStage >= AccessStage + StageDiff + np) {
2377
26
        // If the Phi references another Phi, we need to examine the other
2378
26
        // Phi to get the correct value.
2379
26
        PhiOp1 = LoopVal;
2380
26
        MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2381
26
        int Indirects = 1;
2382
42
        while (InstOp1 && InstOp1->isPHI() && 
InstOp1->getParent() == BB27
) {
2383
26
          int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2384
26
          if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2385
16
            PhiOp1 = getInitPhiReg(*InstOp1, BB);
2386
10
          else
2387
10
            PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2388
26
          InstOp1 = MRI.getVRegDef(PhiOp1);
2389
26
          int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2390
26
          int StageAdj = (PhiOpStage != -1 ? 
PhiStage - PhiOpStage10
:
016
);
2391
26
          if (PhiOpStage != -1 && 
PrologStage - StageAdj >= Indirects + np10
&&
2392
26
              
VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)10
) {
2393
10
            PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2394
10
            break;
2395
10
          }
2396
16
          ++Indirects;
2397
16
        }
2398
26
      } else
2399
187
        PhiOp1 = InitVal;
2400
703
      // If this references a generated Phi in the kernel, get the Phi operand
2401
703
      // from the incoming block.
2402
703
      if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2403
703
        if (InstOp1->isPHI() && 
InstOp1->getParent() == KernelBB70
)
2404
7
          PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2405
703
2406
703
      MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2407
703
      bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2408
703
      // In the epilog, a map lookup is needed to get the value from the kernel,
2409
703
      // or previous epilog block. How is does this depends on if the
2410
703
      // instruction is scheduled in the previous block.
2411
703
      if (!InKernel) {
2412
423
        int StageDiffAdj = 0;
2413
423
        if (LoopValStage != -1 && StageScheduled > LoopValStage)
2414
3
          StageDiffAdj = StageScheduled - LoopValStage;
2415
423
        // Use the loop value defined in the kernel, unless the kernel
2416
423
        // contains the last definition of the Phi.
2417
423
        if (np == 0 && 
PrevStage == LastStageNum391
&&
2418
423
            
(266
StageScheduled != 0266
||
LoopValStage != 0174
) &&
2419
423
            
VRMap[PrevStage - StageDiffAdj].count(LoopVal)105
)
2420
105
          PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2421
318
        // Use the value defined by the Phi. We add one because we switch
2422
318
        // from looking at the loop value to the Phi definition.
2423
318
        else if (np > 0 && 
PrevStage == LastStageNum32
&&
2424
318
                 
VRMap[PrevStage - np + 1].count(Def)21
)
2425
21
          PhiOp2 = VRMap[PrevStage - np + 1][Def];
2426
297
        // Use the loop value defined in the kernel.
2427
297
        else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2428
297
                 
VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)41
)
2429
41
          PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2430
256
        // Use the value defined by the Phi, unless we're generating the first
2431
256
        // epilog and the Phi refers to a Phi in a different stage.
2432
256
        else if (VRMap[PrevStage - np].count(Def) &&
2433
256
                 (!LoopDefIsPhi || 
(PrevStage != LastStageNum)27
||
(LoopValStage == StageScheduled)14
))
2434
256
          PhiOp2 = VRMap[PrevStage - np][Def];
2435
423
      }
2436
703
2437
703
      // Check if we can reuse an existing Phi. This occurs when a Phi
2438
703
      // references another Phi, and the other Phi is scheduled in an
2439
703
      // earlier stage. We can try to reuse an existing Phi up until the last
2440
703
      // stage of the current Phi.
2441
703
      if (LoopDefIsPhi) {
2442
47
        if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2443
40
          int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2444
40
          int StageDiff = (StageScheduled - LoopValStage);
2445
40
          LVNumStages -= StageDiff;
2446
40
          // Make sure the loop value Phi has been processed already.
2447
40
          if (LVNumStages > (int)np && 
VRMap[CurStageNum].count(LoopVal)9
) {
2448
0
            NewReg = PhiOp2;
2449
0
            unsigned ReuseStage = CurStageNum;
2450
0
            if (Schedule.isLoopCarried(this, *PhiInst))
2451
0
              ReuseStage -= LVNumStages;
2452
0
            // Check if the Phi to reuse has been generated yet. If not, then
2453
0
            // there is nothing to reuse.
2454
0
            if (VRMap[ReuseStage - np].count(LoopVal)) {
2455
0
              NewReg = VRMap[ReuseStage - np][LoopVal];
2456
0
2457
0
              rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2458
0
                                    &*BBI, Def, NewReg);
2459
0
              // Update the map with the new Phi name.
2460
0
              VRMap[CurStageNum - np][Def] = NewReg;
2461
0
              PhiOp2 = NewReg;
2462
0
              if (VRMap[LastStageNum - np - 1].count(LoopVal))
2463
0
                PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2464
0
2465
0
              if (IsLast && np == NumPhis - 1)
2466
0
                replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2467
0
              continue;
2468
0
            }
2469
47
          }
2470
40
        }
2471
47
        if (InKernel && 
StageDiff > 017
&&
2472
47
            
VRMap[CurStageNum - StageDiff - np].count(LoopVal)1
)
2473
1
          PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2474
47
      }
2475
703
2476
703
      const TargetRegisterClass *RC = MRI.getRegClass(Def);
2477
703
      NewReg = MRI.createVirtualRegister(RC);
2478
703
2479
703
      MachineInstrBuilder NewPhi =
2480
703
          BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2481
703
                  TII->get(TargetOpcode::PHI), NewReg);
2482
703
      NewPhi.addReg(PhiOp1).addMBB(BB1);
2483
703
      NewPhi.addReg(PhiOp2).addMBB(BB2);
2484
703
      if (np == 0)
2485
650
        InstrMap[NewPhi] = &*BBI;
2486
703
2487
703
      // We define the Phis after creating the new pipelined code, so
2488
703
      // we need to rename the Phi values in scheduled instructions.
2489
703
2490
703
      unsigned PrevReg = 0;
2491
703
      if (InKernel && 
VRMap[PrevStage - np].count(LoopVal)280
)
2492
271
        PrevReg = VRMap[PrevStage - np][LoopVal];
2493
703
      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2494
703
                            Def, NewReg, PrevReg);
2495
703
      // If the Phi has been scheduled, use the new name for rewriting.
2496
703
      if (VRMap[CurStageNum - np].count(Def)) {
2497
32
        unsigned R = VRMap[CurStageNum - np][Def];
2498
32
        rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2499
32
                              R, NewReg);
2500
32
      }
2501
703
2502
703
      // Check if we need to rename any uses that occurs after the loop. The
2503
703
      // register to replace depends on whether the Phi is scheduled in the
2504
703
      // epilog.
2505
703
      if (IsLast && 
np == NumPhis - 1281
)
2506
266
        replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2507
703
2508
703
      // In the kernel, a dependent Phi uses the value from this Phi.
2509
703
      if (InKernel)
2510
280
        PhiOp2 = NewReg;
2511
703
2512
703
      // Update the map with the new Phi name.
2513
703
      VRMap[CurStageNum - np][Def] = NewReg;
2514
703
    }
2515
657
2516
665
    while (NumPhis++ < NumStages) {
2517
8
      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2518
8
                            &*BBI, Def, NewReg, 0);
2519
8
    }
2520
657
2521
657
    // Check if we need to rename a Phi that has been eliminated due to
2522
657
    // scheduling.
2523
657
    if (NumStages == 0 && 
IsLast7
&&
VRMap[CurStageNum].count(LoopVal)0
)
2524
0
      replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2525
657
  }
2526
294
}
2527
2528
/// Generate Phis for the specified block in the generated pipelined code.
2529
/// These are new Phis needed because the definition is scheduled after the
2530
/// use in the pipelined sequence.
2531
void SwingSchedulerDAG::generatePhis(
2532
    MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
2533
    MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2534
    InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2535
294
    bool IsLast) {
2536
294
  // Compute the stage number that contains the initial Phi value, and
2537
294
  // the Phi from the previous stage.
2538
294
  unsigned PrologStage = 0;
2539
294
  unsigned PrevStage = 0;
2540
294
  unsigned StageDiff = CurStageNum - LastStageNum;
2541
294
  bool InKernel = (StageDiff == 0);
2542
294
  if (InKernel) {
2543
124
    PrologStage = LastStageNum - 1;
2544
124
    PrevStage = CurStageNum;
2545
170
  } else {
2546
170
    PrologStage = LastStageNum - StageDiff;
2547
170
    PrevStage = LastStageNum + StageDiff - 1;
2548
170
  }
2549
294
2550
294
  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2551
294
                                   BBE = BB->instr_end();
2552
3.31k
       BBI != BBE; 
++BBI3.02k
) {
2553
13.3k
    for (unsigned i = 0, e = BBI->getNumOperands(); i != e; 
++i10.3k
) {
2554
10.3k
      MachineOperand &MO = BBI->getOperand(i);
2555
10.3k
      if (!MO.isReg() || 
!MO.isDef()8.25k
||
2556
10.3k
          
!TargetRegisterInfo::isVirtualRegister(MO.getReg())3.40k
)
2557
7.90k
        continue;
2558
2.45k
2559
2.45k
      int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2560
2.45k
      assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2561
2.45k
      unsigned Def = MO.getReg();
2562
2.45k
      unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2563
2.45k
      // An instruction scheduled in stage 0 and is used after the loop
2564
2.45k
      // requires a phi in the epilog for the last definition from either
2565
2.45k
      // the kernel or prolog.
2566
2.45k
      if (!InKernel && 
NumPhis == 01.46k
&&
StageScheduled == 01.03k
&&
2567
2.45k
          
hasUseAfterLoop(Def, BB, MRI)393
)
2568
9
        NumPhis = 1;
2569
2.45k
      if (!InKernel && 
(unsigned)StageScheduled > PrologStage1.46k
)
2570
563
        continue;
2571
1.89k
2572
1.89k
      unsigned PhiOp2 = VRMap[PrevStage][Def];
2573
1.89k
      if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2574
1.82k
        if (InstOp2->isPHI() && 
InstOp2->getParent() == NewBB92
)
2575
0
          PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2576
1.89k
      // The number of Phis can't exceed the number of prolog stages. The
2577
1.89k
      // prolog stage number is zero based.
2578
1.89k
      if (NumPhis > PrologStage + 1 - StageScheduled)
2579
16
        NumPhis = PrologStage + 1 - StageScheduled;
2580
2.56k
      for (unsigned np = 0; np < NumPhis; 
++np672
) {
2581
672
        unsigned PhiOp1 = VRMap[PrologStage][Def];
2582
672
        if (np <= PrologStage)
2583
672
          PhiOp1 = VRMap[PrologStage - np][Def];
2584
672
        if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2585
672
          if (InstOp1->isPHI() && 
InstOp1->getParent() == KernelBB302
)
2586
302
            PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2587
672
          if (InstOp1->isPHI() && 
InstOp1->getParent() == NewBB302
)
2588
0
            PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2589
672
        }
2590
672
        if (!InKernel)
2591
387
          PhiOp2 = VRMap[PrevStage - np][Def];
2592
672
2593
672
        const TargetRegisterClass *RC = MRI.getRegClass(Def);
2594
672
        unsigned NewReg = MRI.createVirtualRegister(RC);
2595
672
2596
672
        MachineInstrBuilder NewPhi =
2597
672
            BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2598
672
                    TII->get(TargetOpcode::PHI), NewReg);
2599
672
        NewPhi.addReg(PhiOp1).addMBB(BB1);
2600
672
        NewPhi.addReg(PhiOp2).addMBB(BB2);
2601
672
        if (np == 0)
2602
637
          InstrMap[NewPhi] = &*BBI;
2603
672
2604
672
        // Rewrite uses and update the map. The actions depend upon whether
2605
672
        // we generating code for the kernel or epilog blocks.
2606
672
        if (InKernel) {
2607
285
          rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2608
285
                                &*BBI, PhiOp1, NewReg);
2609
285
          rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2610
285
                                &*BBI, PhiOp2, NewReg);
2611
285
2612
285
          PhiOp2 = NewReg;
2613
285
          VRMap[PrevStage - np - 1][Def] = NewReg;
2614
387
        } else {
2615
387
          VRMap[CurStageNum - np][Def] = NewReg;
2616
387
          if (np == NumPhis - 1)
2617
368
            rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2618
368
                                  &*BBI, Def, NewReg);
2619
387
        }
2620
672
        if (IsLast && 
np == NumPhis - 1220
)
2621
220
          replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2622
672
      }
2623
1.89k
    }
2624
3.02k
  }
2625
294
}
2626
2627
/// Remove instructions that generate values with no uses.
2628
/// Typically, these are induction variable operations that generate values
2629
/// used in the loop itself.  A dead instruction has a definition with
2630
/// no uses, or uses that occur in the original loop only.
2631
void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2632
124
                                               MBBVectorTy &EpilogBBs) {
2633
124
  // For each epilog block, check that the value defined by each instruction
2634
124
  // is used.  If not, delete it.
2635
124
  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2636
124
                                     MBE = EpilogBBs.rend();
2637
294
       MBB != MBE; 
++MBB170
)
2638
170
    for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2639
170
                                                   ME = (*MBB)->instr_rend();
2640
1.73k
         MI != ME;) {
2641
1.56k
      // From DeadMachineInstructionElem. Don't delete inline assembly.
2642
1.56k
      if (MI->isInlineAsm()) {
2643
0
        ++MI;
2644
0
        continue;
2645
0
      }
2646
1.56k
      bool SawStore = false;
2647
1.56k
      // Check if it's safe to remove the instruction due to side effects.
2648
1.56k
      // We can, and want to, remove Phis here.
2649
1.56k
      if (!MI->isSafeToMove(nullptr, SawStore) && 
!MI->isPHI()1.07k
) {
2650
267
        ++MI;
2651
267
        continue;
2652
267
      }
2653
1.30k
      bool used = true;
2654
1.30k
      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2655
1.30k
                                      MOE = MI->operands_end();
2656
2.53k
           MOI != MOE; 
++MOI1.23k
) {
2657
2.28k
        if (!MOI->isReg() || 
!MOI->isDef()1.79k
)
2658
980
          continue;
2659
1.30k
        unsigned reg = MOI->getReg();
2660
1.30k
        // Assume physical registers are used, unless they are marked dead.
2661
1.30k
        if (TargetRegisterInfo::isPhysicalRegister(reg)) {
2662
0
          used = !MOI->isDead();
2663
0
          if (used)
2664
0
            break;
2665
0
          continue;
2666
0
        }
2667
1.30k
        unsigned realUses = 0;
2668
1.30k
        for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2669
1.30k
                                               EI = MRI.use_end();
2670
1.30k
             UI != EI; 
++UI0
) {
2671
1.05k
          // Check if there are any uses that occur only in the original
2672
1.05k
          // loop.  If so, that's not a real use.
2673
1.05k
          if (UI->getParent()->getParent() != BB) {
2674
1.05k
            realUses++;
2675
1.05k
            used = true;
2676
1.05k
            break;
2677
1.05k
          }
2678
1.05k
        }
2679
1.30k
        if (realUses > 0)
2680
1.05k
          break;
2681
251
        used = false;
2682
251
      }
2683
1.30k
      if (!used) {
2684
251
        LIS.RemoveMachineInstrFromMaps(*MI);
2685
251
        MI++->eraseFromParent();
2686
251
        continue;
2687
251
      }
2688
1.05k
      ++MI;
2689
1.05k
    }
2690
124
  // In the kernel block, check if we can remove a Phi that generates a value
2691
124
  // used in an instruction removed in the epilog block.
2692
124
  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2693
124
                                   BBE = KernelBB->getFirstNonPHI();
2694
689
       BBI != BBE;) {
2695
565
    MachineInstr *MI = &*BBI;
2696
565
    ++BBI;
2697
565
    unsigned reg = MI->getOperand(0).getReg();
2698
565
    if (MRI.use_begin(reg) == MRI.use_end()) {
2699
0
      LIS.RemoveMachineInstrFromMaps(*MI);
2700
0
      MI->eraseFromParent();
2701
0
    }
2702
565
  }
2703
124
}
2704
2705
/// For loop carried definitions, we split the lifetime of a virtual register
2706
/// that has uses past the definition in the next iteration. A copy with a new
2707
/// virtual register is inserted before the definition, which helps with
2708
/// generating a better register assignment.
2709
///
2710
///   v1 = phi(a, v2)     v1 = phi(a, v2)
2711
///   v2 = phi(b, v3)     v2 = phi(b, v3)
2712
///   v3 = ..             v4 = copy v1
2713
///   .. = V1             v3 = ..
2714
///                       .. = v4
2715
void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2716
                                       MBBVectorTy &EpilogBBs,
2717
124
                                       SMSchedule &Schedule) {
2718
124
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2719
565
  for (auto &PHI : KernelBB->phis()) {
2720
565
    unsigned Def = PHI.getOperand(0).getReg();
2721
565
    // Check for any Phi definition that used as an operand of another Phi
2722
565
    // in the same block.
2723
565
    for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
2724
565
                                                 E = MRI.use_instr_end();
2725
1.47k
         I != E; 
++I912
) {
2726
934
      if (I->isPHI() && 
I->getParent() == KernelBB212
) {
2727
49
        // Get the loop carried definition.
2728
49
        unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
2729
49
        if (!LCDef)
2730
0
          continue;
2731
49
        MachineInstr *MI = MRI.getVRegDef(LCDef);
2732
49
        if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2733
5
          continue;
2734
44
        // Search through the rest of the block looking for uses of the Phi
2735
44
        // definition. If one occurs, then split the lifetime.
2736
44
        unsigned SplitReg = 0;
2737
44
        for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2738
44
                                    KernelBB->instr_end()))
2739
287
          if (BBJ.readsRegister(Def)) {
2740
37
            // We split the lifetime when we find the first use.
2741
37
            if (SplitReg == 0) {
2742
22
              SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2743
22
              BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2744
22
                      TII->get(TargetOpcode::COPY), SplitReg)
2745
22
                  .addReg(Def);
2746
22
            }
2747
37
            BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2748
37
          }
2749
44
        if (!SplitReg)
2750
22
          continue;
2751
22
        // Search through each of the epilog blocks for any uses to be renamed.
2752
22
        for (auto &Epilog : EpilogBBs)
2753
35
          for (auto &I : *Epilog)
2754
536
            if (I.readsRegister(Def))
2755
36
              I.substituteRegister(Def, SplitReg, 0, *TRI);
2756
22
        break;
2757
22
      }
2758
934
    }
2759
565
  }
2760
124
}
2761
2762
/// Remove the incoming block from the Phis in a basic block.
2763
40
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2764
173
  for (MachineInstr &MI : *BB) {
2765
173
    if (!MI.isPHI())
2766
40
      break;
2767
159
    
for (unsigned i = 1, e = MI.getNumOperands(); 133
i != e;
i += 226
)
2768
159
      if (MI.getOperand(i + 1).getMBB() == Incoming) {
2769
133
        MI.RemoveOperand(i + 1);
2770
133
        MI.RemoveOperand(i);
2771
133
        break;
2772
133
      }
2773
133
  }
2774
40
}
2775
2776
/// Create branches from each prolog basic block to the appropriate epilog
2777
/// block.  These edges are needed if the loop ends before reaching the
2778
/// kernel.
2779
void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
2780
                                    MBBVectorTy &PrologBBs,
2781
                                    MachineBasicBlock *KernelBB,
2782
                                    MBBVectorTy &EpilogBBs,
2783
124
                                    SMSchedule &Schedule, ValueMapTy *VRMap) {
2784
124
  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2785
124
  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2786
124
  MachineInstr *Cmp = Pass.LI.LoopCompare;
2787
124
  MachineBasicBlock *LastPro = KernelBB;
2788
124
  MachineBasicBlock *LastEpi = KernelBB;
2789
124
2790
124
  // Start from the blocks connected to the kernel and work "out"
2791
124
  // to the first prolog and the last epilog blocks.
2792
124
  SmallVector<MachineInstr *, 4> PrevInsts;
2793
124
  unsigned MaxIter = PrologBBs.size() - 1;
2794
124
  unsigned LC = UINT_MAX;
2795
124
  unsigned LCMin = UINT_MAX;
2796
294
  for (unsigned i = 0, j = MaxIter; i <= MaxIter; 
++i, --j170
) {
2797
170
    // Add branches to the prolog that go to the corresponding
2798
170
    // epilog, and the fall-thru prolog/kernel block.
2799
170
    MachineBasicBlock *Prolog = PrologBBs[j];
2800
170
    MachineBasicBlock *Epilog = EpilogBBs[i];
2801
170
    // We've executed one iteration, so decrement the loop count and check for
2802
170
    // the loop end.
2803
170
    SmallVector<MachineOperand, 4> Cond;
2804
170
    // Check if the LOOP0 has already been removed. If so, then there is no need
2805
170
    // to reduce the trip count.
2806
170
    if (LC != 0)
2807
169
      LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
2808
169
                                PrevInsts, j, MaxIter);
2809
170
2810
170
    // Record the value of the first trip count, which is used to determine if
2811
170
    // branches and blocks can be removed for constant trip counts.
2812
170
    if (LCMin == UINT_MAX)
2813
170
      
LCMin = LC124
;
2814
170
2815
170
    unsigned numAdded = 0;
2816
170
    if (TargetRegisterInfo::isVirtualRegister(LC)) {
2817
130
      Prolog->addSuccessor(Epilog);
2818
130
      numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
2819
130
    } else 
if (40
j >= LCMin40
) {
2820
5
      Prolog->addSuccessor(Epilog);
2821
5
      Prolog->removeSuccessor(LastPro);
2822
5
      LastEpi->removeSuccessor(Epilog);
2823
5
      numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
2824
5
      removePhis(Epilog, LastEpi);
2825
5
      // Remove the blocks that are no longer referenced.
2826
5
      if (LastPro != LastEpi) {
2827
1
        LastEpi->clear();
2828
1
        LastEpi->eraseFromParent();
2829
1
      }
2830
5
      LastPro->clear();
2831
5
      LastPro->eraseFromParent();
2832
35
    } else {
2833
35
      numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
2834
35
      removePhis(Epilog, Prolog);
2835
35
    }
2836
170
    LastPro = Prolog;
2837
170
    LastEpi = Epilog;
2838
170
    for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
2839
170
                                                   E = Prolog->instr_rend();
2840
470
         I != E && numAdded > 0; 
++I, --numAdded300
)
2841
300
      updateInstruction(&*I, false, j, 0, Schedule, VRMap);
2842
170
  }
2843
124
}
2844
2845
/// Return true if we can compute the amount the instruction changes
2846
/// during each iteration. Set Delta to the amount of the change.
2847
1.36k
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2848
1.36k
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2849
1.36k
  const MachineOperand *BaseOp;
2850
1.36k
  int64_t Offset;
2851
1.36k
  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
2852
155
    return false;
2853
1.20k
2854
1.20k
  if (!BaseOp->isReg())
2855
0
    return false;
2856
1.20k
2857
1.20k
  unsigned BaseReg = BaseOp->getReg();
2858
1.20k
2859
1.20k
  MachineRegisterInfo &MRI = MF.getRegInfo();
2860
1.20k
  // Check if there is a Phi. If so, get the definition in the loop.
2861
1.20k
  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2862
1.20k
  if (BaseDef && BaseDef->isPHI()) {
2863
1.05k
    BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2864
1.05k
    BaseDef = MRI.getVRegDef(BaseReg);
2865
1.05k
  }
2866
1.20k
  if (!BaseDef)
2867
15
    return false;
2868
1.19k
2869
1.19k
  int D = 0;
2870
1.19k
  if (!TII->getIncrementValue(*BaseDef, D) && 
D >= 0138
)
2871
138
    return false;
2872
1.05k
2873
1.05k
  Delta = D;
2874
1.05k
  return true;
2875
1.05k
}
2876
2877
/// Update the memory operand with a new offset when the pipeliner
2878
/// generates a new copy of the instruction that refers to a
2879
/// different memory location.
2880
void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
2881
2.43k
                                          MachineInstr &OldMI, unsigned Num) {
2882
2.43k
  if (Num == 0)
2883
994
    return;
2884
1.44k
  // If the instruction has memory operands, then adjust the offset
2885
1.44k
  // when the instruction appears in different stages.
2886
1.44k
  if (NewMI.memoperands_empty())
2887
995
    return;
2888
447
  SmallVector<MachineMemOperand *, 2> NewMMOs;
2889
477
  for (MachineMemOperand *MMO : NewMI.memoperands()) {
2890
477
    // TODO: Figure out whether isAtomic is really necessary (see D57601).
2891
477
    if (MMO->isVolatile() || MMO->isAtomic() ||
2892
477
        (MMO->isInvariant() && 
MMO->isDereferenceable()0
) ||
2893
477
        (!MMO->getValue())) {
2894
0
      NewMMOs.push_back(MMO);
2895
0
      continue;
2896
0
    }
2897
477
    unsigned Delta;
2898
477
    if (Num != UINT_MAX && 
computeDelta(OldMI, Delta)260
) {
2899
185
      int64_t AdjOffset = Delta * Num;
2900
185
      NewMMOs.push_back(
2901
185
          MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
2902
292
    } else {
2903
292
      NewMMOs.push_back(
2904
292
          MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
2905
292
    }
2906
477
  }
2907
447
  NewMI.setMemRefs(MF, NewMMOs);
2908
447
}
2909
2910
/// Clone the instruction for the new pipelined loop and update the
2911
/// memory operands, if needed.
2912
MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
2913
                                            unsigned CurStageNum,
2914
1.62k
                                            unsigned InstStageNum) {
2915
1.62k
  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2916
1.62k
  // Check for tied operands in inline asm instructions. This should be handled
2917
1.62k
  // elsewhere, but I'm not sure of the best solution.
2918
1.62k
  if (OldMI->isInlineAsm())
2919
0
    for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
2920
0
      const auto &MO = OldMI->getOperand(i);
2921
0
      if (MO.isReg() && MO.isUse())
2922
0
        break;
2923
0
      unsigned UseIdx;
2924
0
      if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
2925
0
        NewMI->tieOperands(i, UseIdx);
2926
0
    }
2927
1.62k
  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2928
1.62k
  return NewMI;
2929
1.62k
}
2930
2931
/// Clone the instruction for the new pipelined loop. If needed, this
2932
/// function updates the instruction using the values saved in the
2933
/// InstrChanges structure.
2934
MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
2935
                                                     unsigned CurStageNum,
2936
                                                     unsigned InstStageNum,
2937
808
                                                     SMSchedule &Schedule) {
2938
808
  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2939
808
  DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
2940
808
      InstrChanges.find(getSUnit(OldMI));
2941
808
  if (It != InstrChanges.end()) {
2942
26
    std::pair<unsigned, int64_t> RegAndOffset = It->second;
2943
26
    unsigned BasePos, OffsetPos;
2944
26
    if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
2945
0
      return nullptr;
2946
26
    int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
2947
26
    MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
2948
26
    if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
2949
26
      NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
2950
26
    NewMI->getOperand(OffsetPos).setImm(NewOffset);
2951
26
  }
2952
808
  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2953
808
  return NewMI;
2954
808
}
2955
2956
/// Update the machine instruction with new virtual registers.  This
2957
/// function may change the defintions and/or uses.
2958
void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
2959
                                          unsigned CurStageNum,
2960
                                          unsigned InstrStageNum,
2961
                                          SMSchedule &Schedule,
2962
2.98k
                                          ValueMapTy *VRMap) {
2963
12.8k
  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; 
++i9.89k
) {
2964
9.89k
    MachineOperand &MO = NewMI->getOperand(i);
2965
9.89k
    if (!MO.isReg() || 
!TargetRegisterInfo::isVirtualRegister(MO.getReg())7.84k
)
2966
3.09k
      continue;
2967
6.80k
    unsigned reg = MO.getReg();
2968
6.80k
    if (MO.isDef()) {
2969
2.45k
      // Create a new virtual register for the definition.
2970
2.45k
      const TargetRegisterClass *RC = MRI.getRegClass(reg);
2971
2.45k
      unsigned NewReg = MRI.createVirtualRegister(RC);
2972
2.45k
      MO.setReg(NewReg);
2973
2.45k
      VRMap[CurStageNum][reg] = NewReg;
2974
2.45k
      if (LastDef)
2975
456
        replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
2976
4.35k
    } else if (MO.isUse()) {
2977
4.35k
      MachineInstr *Def = MRI.getVRegDef(reg);
2978
4.35k
      // Compute the stage that contains the last definition for instruction.
2979
4.35k
      int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
2980
4.35k
      unsigned StageNum = CurStageNum;
2981
4.35k
      if (DefStageNum != -1 && 
(int)InstrStageNum > DefStageNum3.36k
) {
2982
466
        // Compute the difference in stages between the defintion and the use.
2983
466
        unsigned StageDiff = (InstrStageNum - DefStageNum);
2984
466
        // Make an adjustment to get the last definition.
2985
466
        StageNum -= StageDiff;
2986
466
      }
2987
4.35k
      if (VRMap[StageNum].count(reg))
2988
1.99k
        MO.setReg(VRMap[StageNum][reg]);
2989
4.35k
    }
2990
6.80k
  }
2991
2.98k
}
2992
2993
/// Return the instruction in the loop that defines the register.
2994
/// If the definition is a Phi, then follow the Phi operand to
2995
/// the instruction in the loop.
2996
51
MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
2997
51
  SmallPtrSet<MachineInstr *, 8> Visited;
2998
51
  MachineInstr *Def = MRI.getVRegDef(Reg);
2999
76
  while (Def->isPHI()) {
3000
25
    if (!Visited.insert(Def).second)
3001
0
      break;
3002
50
    
for (unsigned i = 1, e = Def->getNumOperands(); 25
i < e;
i += 225
)
3003
50
      if (Def->getOperand(i + 1).getMBB() == BB) {
3004
25
        Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3005
25
        break;
3006
25
      }
3007
25
  }
3008
51
  return Def;
3009
51
}
3010
3011
/// Return the new name for the value from the previous stage.
3012
unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3013
                                          unsigned LoopVal, unsigned LoopStage,
3014
                                          ValueMapTy *VRMap,
3015
414
                                          MachineBasicBlock *BB) {
3016
414
  unsigned PrevVal = 0;
3017
414
  if (StageNum > PhiStage) {
3018
69
    MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3019
69
    if (PhiStage == LoopStage && 
VRMap[StageNum - 1].count(LoopVal)62
)
3020
52
      // The name is defined in the previous stage.
3021
52
      PrevVal = VRMap[StageNum - 1][LoopVal];
3022
17
    else if (VRMap[StageNum].count(LoopVal))
3023
7
      // The previous name is defined in the current stage when the instruction
3024
7
      // order is swapped.
3025
7
      PrevVal = VRMap[StageNum][LoopVal];
3026
10
    else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3027
0
      // The loop value hasn't yet been scheduled.
3028
0
      PrevVal = LoopVal;
3029
10
    else if (StageNum == PhiStage + 1)
3030
10
      // The loop value is another phi, which has not been scheduled.
3031
10
      PrevVal = getInitPhiReg(*LoopInst, BB);
3032
0
    else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3033
0
      // The loop value is another phi, which has been scheduled.
3034
0
      PrevVal =
3035
0
          getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3036
0
                        LoopStage, VRMap, BB);
3037
69
  }
3038
414
  return PrevVal;
3039
414
}
3040
3041
/// Rewrite the Phi values in the specified block to use the mappings
3042
/// from the initial operand. Once the Phi is scheduled, we switch
3043
/// to using the loop value instead of the Phi value, so those names
3044
/// do not need to be rewritten.
3045
void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3046
                                         unsigned StageNum,
3047
                                         SMSchedule &Schedule,
3048
                                         ValueMapTy *VRMap,
3049
170
                                         InstrMapTy &InstrMap) {
3050
391
  for (auto &PHI : BB->phis()) {
3051
391
    unsigned InitVal = 0;
3052
391
    unsigned LoopVal = 0;
3053
391
    getPhiRegs(PHI, BB, InitVal, LoopVal);
3054
391
    unsigned PhiDef = PHI.getOperand(0).getReg();
3055
391
3056
391
    unsigned PhiStage =
3057
391
        (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3058
391
    unsigned LoopStage =
3059
391
        (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3060
391
    unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3061
391
    if (NumPhis > StageNum)
3062
29
      NumPhis = StageNum;
3063
805
    for (unsigned np = 0; np <= NumPhis; 
++np414
) {
3064
414
      unsigned NewVal =
3065
414
          getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3066
414
      if (!NewVal)
3067
345
        NewVal = InitVal;
3068
414
      rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
3069
414
                            PhiDef, NewVal);
3070
414
    }
3071
391
  }
3072
170
}
3073
3074
/// Rewrite a previously scheduled instruction to use the register value
3075
/// from the new instruction. Make sure the instruction occurs in the
3076
/// basic block, and we don't change the uses in the new instruction.
3077
void SwingSchedulerDAG::rewriteScheduledInstr(
3078
    MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3079
    unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3080
2.10k
    unsigned NewReg, unsigned PrevReg) {
3081
2.10k
  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3082
2.10k
  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3083
2.10k
  // Rewrite uses that have been scheduled already to use the new
3084
2.10k
  // Phi register.
3085
2.10k
  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3086
2.10k
                                         EI = MRI.use_end();
3087
7.17k
       UI != EI;) {
3088
5.07k
    MachineOperand &UseOp = *UI;
3089
5.07k
    MachineInstr *UseMI = UseOp.getParent();
3090
5.07k
    ++UI;
3091
5.07k
    if (UseMI->getParent() != BB)
3092
2.62k
      continue;
3093
2.44k
    if (UseMI->isPHI()) {
3094
643
      if (!Phi->isPHI() && 
UseMI->getOperand(0).getReg() == NewReg588
)
3095
570
        continue;
3096
73
      if (getLoopPhiReg(*UseMI, BB) != OldReg)
3097
57
        continue;
3098
1.82k
    }
3099
1.82k
    InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3100
1.82k
    assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3101
1.82k
    SUnit *OrigMISU = getSUnit(OrigInstr->second);
3102
1.82k
    int StageSched = Schedule.stageScheduled(OrigMISU);
3103
1.82k
    int CycleSched = Schedule.cycleScheduled(OrigMISU);
3104
1.82k
    unsigned ReplaceReg = 0;
3105
1.82k
    // This is the stage for the scheduled instruction.
3106
1.82k
    if (StagePhi == StageSched && 
Phi->isPHI()1.01k
) {
3107
970
      int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3108
970
      if (PrevReg && 
InProlog402
)
3109
0
        ReplaceReg = PrevReg;
3110
970
      else if (PrevReg && 
!Schedule.isLoopCarried(this, *Phi)402
&&
3111
970
               
(15
CyclePhi <= CycleSched15
||
OrigMISU->getInstr()->isPHI()0
))
3112
15
        ReplaceReg = PrevReg;
3113
955
      else
3114
955
        ReplaceReg = NewReg;
3115
970
    }
3116
1.82k
    // The scheduled instruction occurs before the scheduled Phi, and the
3117
1.82k
    // Phi is not loop carried.
3118
1.82k
    if (!InProlog && 
StagePhi + 1 == StageSched1.38k
&&
3119
1.82k
        
!Schedule.isLoopCarried(this, *Phi)721
)
3120
647
      ReplaceReg = NewReg;
3121
1.82k
    if (StagePhi > StageSched && 
Phi->isPHI()44
)
3122
44
      ReplaceReg = NewReg;
3123
1.82k
    if (!InProlog && 
!Phi->isPHI()1.38k
&&
StagePhi < StageSched686
)
3124
643
      ReplaceReg = NewReg;
3125
1.82k
    if (ReplaceReg) {
3126
1.68k
      MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3127
1.68k
      UseOp.setReg(ReplaceReg);
3128
1.68k
    }
3129
1.82k
  }
3130
2.10k
}
3131
3132
/// Check if we can change the instruction to use an offset value from the
3133
/// previous iteration. If so, return true and set the base and offset values
3134
/// so that we can rewrite the load, if necessary.
3135
///   v1 = Phi(v0, v3)
3136
///   v2 = load v1, 0
3137
///   v3 = post_store v1, 4, x
3138
/// This function enables the load to be rewritten as v2 = load v3, 4.
3139
bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3140
                                              unsigned &BasePos,
3141
                                              unsigned &OffsetPos,
3142
                                              unsigned &NewBase,
3143
2.03k
                                              int64_t &Offset) {
3144
2.03k
  // Get the load instruction.
3145
2.03k
  if (TII->isPostIncrement(*MI))
3146
181
    return false;
3147
1.85k
  unsigned BasePosLd, OffsetPosLd;
3148
1.85k
  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3149
1.57k
    return false;
3150
282
  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3151
282
3152
282
  // Look for the Phi instruction.
3153
282
  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
3154
282
  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3155
282
  if (!Phi || !Phi->isPHI())
3156
110
    return false;
3157
172
  // Get the register defined in the loop block.
3158
172
  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3159
172
  if (!PrevReg)
3160
0
    return false;
3161
172
3162
172
  // Check for the post-increment load/store instruction.
3163
172
  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3164
172
  if (!PrevDef || PrevDef == MI)
3165
0
    return false;
3166
172
3167
172
  if (!TII->isPostIncrement(*PrevDef))
3168
147
    return false;
3169
25
3170
25
  unsigned BasePos1 = 0, OffsetPos1 = 0;
3171
25
  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3172
0
    return false;
3173
25
3174
25
  // Make sure that the instructions do not access the same memory location in
3175
25
  // the next iteration.
3176
25
  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3177
25
  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3178
25
  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3179
25
  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3180
25
  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
3181
25
  MF.DeleteMachineInstr(NewMI);
3182
25
  if (!Disjoint)
3183
0
    return false;
3184
25
3185
25
  // Set the return value once we determine that we return true.
3186
25
  BasePos = BasePosLd;
3187
25
  OffsetPos = OffsetPosLd;
3188
25
  NewBase = PrevReg;
3189
25
  Offset = StoreOffset;
3190
25
  return true;
3191
25
}
3192
3193
/// Apply changes to the instruction if needed. The changes are need
3194
/// to improve the scheduling and depend up on the final schedule.
3195
void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
3196
1.65k
                                         SMSchedule &Schedule) {
3197
1.65k
  SUnit *SU = getSUnit(MI);
3198
1.65k
  DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
3199
1.65k
      InstrChanges.find(SU);
3200
1.65k
  if (It != InstrChanges.end()) {
3201
25
    std::pair<unsigned, int64_t> RegAndOffset = It->second;
3202
25
    unsigned BasePos, OffsetPos;
3203
25
    if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3204
0
      return;
3205
25
    unsigned BaseReg = MI->getOperand(BasePos).getReg();
3206
25
    MachineInstr *LoopDef = findDefInLoop(BaseReg);
3207
25
    int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3208
25
    int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3209
25
    int BaseStageNum = Schedule.stageScheduled(SU);
3210
25
    int BaseCycleNum = Schedule.cycleScheduled(SU);
3211
25
    if (BaseStageNum < DefStageNum) {
3212
19
      MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3213
19
      int OffsetDiff = DefStageNum - BaseStageNum;
3214
19
      if (DefCycleNum < BaseCycleNum) {
3215
2
        NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3216
2
        if (OffsetDiff > 0)
3217
2
          --OffsetDiff;
3218
2
      }
3219
19
      int64_t NewOffset =
3220
19
          MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3221
19
      NewMI->getOperand(OffsetPos).setImm(NewOffset);
3222
19
      SU->setInstr(NewMI);
3223
19
      MISUnitMap[NewMI] = SU;
3224
19
      NewMIs.insert(NewMI);
3225
19
    }
3226
25
  }
3227
1.65k
}
3228
3229
/// Return true for an order or output dependence that is loop carried
3230
/// potentially. A dependence is loop carried if the destination defines a valu
3231
/// that may be used or defined by the source in a subsequent iteration.
3232
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
3233
4.51k
                                         bool isSucc) {
3234
4.51k
  if ((Dep.getKind() != SDep::Order && 
Dep.getKind() != SDep::Output3.73k
) ||
3235
4.51k
      
Dep.isArtificial()781
)
3236
3.74k
    return false;
3237
775
3238
775
  if (!SwpPruneLoopCarried)
3239
2
    return true;
3240
773
3241
773
  if (Dep.getKind() == SDep::Output)
3242
4
    return true;
3243
769
3244
769
  MachineInstr *SI = Source->getInstr();
3245
769
  MachineInstr *DI = Dep.getSUnit()->getInstr();
3246
769
  if (!isSucc)
3247
244
    std::swap(SI, DI);
3248
769
  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3249
769
3250
769
  // Assume ordered loads and stores may have a loop carried dependence.
3251
769
  if (SI->hasUnmodeledSideEffects() || 
DI->hasUnmodeledSideEffects()766
||
3252
769
      
SI->mayRaiseFPException()766
||
DI->mayRaiseFPException()766
||
3253
769
      
SI->hasOrderedMemoryRef()766
||
DI->hasOrderedMemoryRef()766
)
3254
3
    return true;
3255
766
3256
766
  // Only chain dependences between a load and store can be loop carried.
3257
766
  if (!DI->mayStore() || 
!SI->mayLoad()707
)
3258
110
    return false;
3259
656
3260
656
  unsigned DeltaS, DeltaD;
3261
656
  if (!computeDelta(*SI, DeltaS) || 
!computeDelta(*DI, DeltaD)447
)
3262
233
    return true;
3263
423
3264
423
  const MachineOperand *BaseOpS, *BaseOpD;
3265
423
  int64_t OffsetS, OffsetD;
3266
423
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
3267
423
  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
3268
423
      !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
3269
0
    return true;
3270
423
3271
423
  if (!BaseOpS->isIdenticalTo(*BaseOpD))
3272
280
    return true;
3273
143
3274
143
  // Check that the base register is incremented by a constant value for each
3275
143
  // iteration.
3276
143
  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
3277
143
  if (!Def || !Def->isPHI())
3278
0
    return true;
3279
143
  unsigned InitVal = 0;
3280
143
  unsigned LoopVal = 0;
3281
143
  getPhiRegs(*Def, BB, InitVal, LoopVal);
3282
143
  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3283
143
  int D = 0;
3284
143
  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3285
0
    return true;
3286
143
3287
143
  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3288
143
  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3289
143
3290
143
  // This is the main test, which checks the offset values and the loop
3291
143
  // increment value to determine if the accesses may be loop carried.
3292
143
  if (AccessSizeS == MemoryLocation::UnknownSize ||
3293
143
      AccessSizeD == MemoryLocation::UnknownSize)
3294
0
    return true;
3295
143
3296
143
  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
3297
0
    return true;
3298
143
3299
143
  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
3300
143
}
3301
3302
192
void SwingSchedulerDAG::postprocessDAG() {
3303
192
  for (auto &M : Mutations)
3304
572
    M->apply(this);
3305
192
}
3306
3307
/// Try to schedule the node at the specified StartCycle and continue
3308
/// until the node is schedule or the EndCycle is reached.  This function
3309
/// returns true if the node is scheduled.  This routine may search either
3310
/// forward or backward for a place to insert the instruction based upon
3311
/// the relative values of StartCycle and EndCycle.
3312
3.15k
bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3313
3.15k
  bool forward = true;
3314
3.15k
  LLVM_DEBUG({
3315
3.15k
    dbgs() << "Trying to insert node between " << StartCycle << " and "
3316
3.15k
           << EndCycle << " II: " << II << "\n";
3317
3.15k
  });
3318
3.15k
  if (StartCycle > EndCycle)
3319
1.81k
    forward = false;
3320
3.15k
3321
3.15k
  // The terminating condition depends on the direction.
3322
3.15k
  int termCycle = forward ? 
EndCycle + 11.34k
:
EndCycle - 11.81k
;
3323
4.10k
  for (int curCycle = StartCycle; curCycle != termCycle;
3324
4.08k
       
forward 947
?
++curCycle366
:
--curCycle581
) {
3325
4.08k
3326
4.08k
    // Add the already scheduled instructions at the specified cycle to the
3327
4.08k
    // DFA.
3328
4.08k
    ProcItinResources.clearResources();
3329
4.08k
    for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3330
9.24k
         checkCycle <= LastCycle; 
checkCycle += II5.15k
) {
3331
5.15k
      std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3332
5.15k
3333
5.15k
      for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3334
5.15k
                                         E = cycleInstrs.end();
3335
10.8k
           I != E; 
++I5.66k
) {
3336
5.66k
        if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3337
1.24k
          continue;
3338
4.42k
        assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
3339
4.42k
               "These instructions have already been scheduled.");
3340
4.42k
        ProcItinResources.reserveResources(*(*I)->getInstr());
3341
4.42k
      }
3342
5.15k
    }
3343
4.08k
    if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3344
4.08k
        
ProcItinResources.canReserveResources(*SU->getInstr())3.49k
) {
3345
3.14k
      LLVM_DEBUG({
3346
3.14k
        dbgs() << "\tinsert at cycle " << curCycle << " ";
3347
3.14k
        SU->getInstr()->dump();
3348
3.14k
      });
3349
3.14k
3350
3.14k
      ScheduledInstrs[curCycle].push_back(SU);
3351
3.14k
      InstrToCycle.insert(std::make_pair(SU, curCycle));
3352
3.14k
      if (curCycle > LastCycle)
3353
402
        LastCycle = curCycle;
3354
3.14k
      if (curCycle < FirstCycle)
3355
138
        FirstCycle = curCycle;
3356
3.14k
      return true;
3357
3.14k
    }
3358
947
    LLVM_DEBUG({
3359
947
      dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3360
947
      SU->getInstr()->dump();
3361
947
    });
3362
947
  }
3363
3.15k
  
return false13
;
3364
3.15k
}
3365
3366
// Return the cycle of the earliest scheduled instruction in the chain.
3367
21
int SMSchedule::earliestCycleInChain(const SDep &Dep) {
3368
21
  SmallPtrSet<SUnit *, 8> Visited;
3369
21
  SmallVector<SDep, 8> Worklist;
3370
21
  Worklist.push_back(Dep);
3371
21
  int EarlyCycle = INT_MAX;
3372
43
  while (!Worklist.empty()) {
3373
22
    const SDep &Cur = Worklist.pop_back_val();
3374
22
    SUnit *PrevSU = Cur.getSUnit();
3375
22
    if (Visited.count(PrevSU))
3376
0
      continue;
3377
22
    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3378
22
    if (it == InstrToCycle.end())
3379
1
      continue;
3380
21
    EarlyCycle = std::min(EarlyCycle, it->second);
3381
21
    for (const auto &PI : PrevSU->Preds)
3382
30
      if (PI.getKind() == SDep::Order || 
Dep.getKind() == SDep::Output29
)
3383
1
        Worklist.push_back(PI);
3384
21
    Visited.insert(PrevSU);
3385
21
  }
3386
21
  return EarlyCycle;
3387
21
}
3388
3389
// Return the cycle of the latest scheduled instruction in the chain.
3390
426
int SMSchedule::latestCycleInChain(const SDep &Dep) {
3391
426
  SmallPtrSet<SUnit *, 8> Visited;
3392
426
  SmallVector<SDep, 8> Worklist;
3393
426
  Worklist.push_back(Dep);
3394
426
  int LateCycle = INT_MIN;
3395
1.29k
  while (!Worklist.empty()) {
3396
865
    const SDep &Cur = Worklist.pop_back_val();
3397
865
    SUnit *SuccSU = Cur.getSUnit();
3398
865
    if (Visited.count(SuccSU))
3399
122
      continue;
3400
743
    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3401
743
    if (it == InstrToCycle.end())
3402
45
      continue;
3403
698
    LateCycle = std::max(LateCycle, it->second);
3404
698
    for (const auto &SI : SuccSU->Succs)
3405
604
      if (SI.getKind() == SDep::Order || 
Dep.getKind() == SDep::Output181
)
3406
439
        Worklist.push_back(SI);
3407
698
    Visited.insert(SuccSU);
3408
698
  }
3409
426
  return LateCycle;
3410
426
}
3411
3412
/// If an instruction has a use that spans multiple iterations, then
3413
/// return true. These instructions are characterized by having a back-ege
3414
/// to a Phi, which contains a reference to another Phi.
3415
40.7k
static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
3416
40.7k
  for (auto &P : SU->Preds)
3417
88.8k
    if (DAG->isBackedge(SU, P) && 
P.getSUnit()->getInstr()->isPHI()6.97k
)
3418
6.97k
      for (auto &S : P.getSUnit()->Succs)
3419
18.1k
        if (S.getKind() == SDep::Data && 
S.getSUnit()->getInstr()->isPHI()11.6k
)
3420
447
          return P.getSUnit();
3421
40.7k
  
return nullptr40.2k
;
3422
40.7k
}
3423
3424
/// Compute the scheduling start slot for the instruction.  The start slot
3425
/// depends on any predecessor or successor nodes scheduled already.
3426
void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3427
                              int *MinEnd, int *MaxStart, int II,
3428
3.24k
                              SwingSchedulerDAG *DAG) {
3429
3.24k
  // Iterate over each instruction that has been scheduled already.  The start
3430
3.24k
  // slot computation depends on whether the previously scheduled instruction
3431
3.24k
  // is a predecessor or successor of the specified instruction.
3432
29.7k
  for (int cycle = getFirstCycle(); cycle <= LastCycle; 
++cycle26.4k
) {
3433
26.4k
3434
26.4k
    // Iterate over each instruction in the current cycle.
3435
27.7k
    for (SUnit *I : getInstructions(cycle)) {
3436
27.7k
      // Because we're processing a DAG for the dependences, we recognize
3437
27.7k
      // the back-edge in recurrences by anti dependences.
3438
68.5k
      for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; 
++i40.7k
) {
3439
40.7k
        const SDep &Dep = SU->Preds[i];
3440
40.7k
        if (Dep.getSUnit() == I) {
3441
1.02k
          if (!DAG->isBackedge(SU, Dep)) {
3442
972
            int EarlyStart = cycle + Dep.getLatency() -
3443
972
                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3444
972
            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3445
972
            if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3446
21
              int End = earliestCycleInChain(Dep) + (II - 1);
3447
21
              *MinEnd = std::min(*MinEnd, End);
3448
21
            }
3449
972
          } else {
3450
53
            int LateStart = cycle - Dep.getLatency() +
3451
53
                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3452
53
            *MinLateStart = std::min(*MinLateStart, LateStart);
3453
53
          }
3454
1.02k
        }
3455
40.7k
        // For instruction that requires multiple iterations, make sure that
3456
40.7k
        // the dependent instruction is not scheduled past the definition.
3457
40.7k
        SUnit *BE = multipleIterations(I, DAG);
3458
40.7k
        if (BE && 
Dep.getSUnit() == BE447
&&
!SU->getInstr()->isPHI()21
&&
3459
40.7k
            
!SU->isPred(I)9
)
3460
9
          *MinLateStart = std::min(*MinLateStart, cycle);
3461
40.7k
      }
3462
71.6k
      for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; 
++i43.8k
) {
3463
43.8k
        if (SU->Succs[i].getSUnit() == I) {
3464
3.38k
          const SDep &Dep = SU->Succs[i];
3465
3.38k
          if (!DAG->isBackedge(SU, Dep)) {
3466
3.02k
            int LateStart = cycle - Dep.getLatency() +
3467
3.02k
                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3468
3.02k
            *MinLateStart = std::min(*MinLateStart, LateStart);
3469
3.02k
            if (DAG->isLoopCarriedDep(SU, Dep)) {
3470
426
              int Start = latestCycleInChain(Dep) + 1 - II;
3471
426
              *MaxStart = std::max(*MaxStart, Start);
3472
426
            }
3473
3.02k
          } else {
3474
360
            int EarlyStart = cycle + Dep.getLatency() -
3475
360
                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3476
360
            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3477
360
          }
3478
3.38k
        }
3479
43.8k
      }
3480
27.7k
    }
3481
26.4k
  }
3482
3.24k
}
3483
3484
/// Order the instructions within a cycle so that the definitions occur
3485
/// before the uses. Returns true if the instruction is added to the start
3486
/// of the list, or false if added to the end.
3487
void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
3488
1.41k
                                 std::deque<SUnit *> &Insts) {
3489
1.41k
  MachineInstr *MI = SU->getInstr();
3490
1.41k
  bool OrderBeforeUse = false;
3491
1.41k
  bool OrderAfterDef = false;
3492
1.41k
  bool OrderBeforeDef = false;
3493
1.41k
  unsigned MoveDef = 0;
3494
1.41k
  unsigned MoveUse = 0;
3495
1.41k
  int StageInst1 = stageScheduled(SU);
3496
1.41k
3497
1.41k
  unsigned Pos = 0;
3498
2.81k
  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3499
1.41k
       
++I, ++Pos1.39k
) {
3500
6.02k
    for (unsigned i = 0, e = MI->getNumOperands(); i < e; 
++i4.63k
) {
3501
4.63k
      MachineOperand &MO = MI->getOperand(i);
3502
4.63k
      if (!MO.isReg() || 
!TargetRegisterInfo::isVirtualRegister(MO.getReg())3.61k
)
3503
1.05k
        continue;
3504
3.58k
3505
3.58k
      unsigned Reg = MO.getReg();
3506
3.58k
      unsigned BasePos, OffsetPos;
3507
3.58k
      if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3508
871
        if (MI->getOperand(BasePos).getReg() == Reg)
3509
415
          if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3510
53
            Reg = NewReg;
3511
3.58k
      bool Reads, Writes;
3512
3.58k
      std::tie(Reads, Writes) =
3513
3.58k
          (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3514
3.58k
      if (MO.isDef() && 
Reads1.38k
&&
stageScheduled(*I) <= StageInst1225
) {
3515
104
        OrderBeforeUse = true;
3516
104
        if (MoveUse == 0)
3517
104
          MoveUse = Pos;
3518
3.47k
      } else if (MO.isDef() && 
Reads1.28k
&&
stageScheduled(*I) > StageInst1121
) {
3519
121
        // Add the instruction after the scheduled instruction.
3520
121
        OrderAfterDef = true;
3521
121
        MoveDef = Pos;
3522
3.35k
      } else if (MO.isUse() && 
Writes2.19k
&&
stageScheduled(*I) == StageInst1116
) {
3523
40
        if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3524
0
          OrderBeforeUse = true;
3525
0
          if (MoveUse == 0)
3526
0
            MoveUse = Pos;
3527
40
        } else {
3528
40
          OrderAfterDef = true;
3529
40
          MoveDef = Pos;
3530
40
        }
3531
3.31k
      } else if (MO.isUse() && 
Writes2.15k
&&
stageScheduled(*I) > StageInst176
) {
3532
28
        OrderBeforeUse = true;
3533
28
        if (MoveUse == 0)
3534
28
          MoveUse = Pos;
3535
28
        if (MoveUse != 0) {
3536
12
          OrderAfterDef = true;
3537
12
          MoveDef = Pos - 1;
3538
12
        }
3539
3.28k
      } else if (MO.isUse() && 
Writes2.12k
&&
stageScheduled(*I) < StageInst148
) {
3540
48
        // Add the instruction before the scheduled instruction.
3541
48
        OrderBeforeUse = true;
3542
48
        if (MoveUse == 0)
3543
48
          MoveUse = Pos;
3544
3.23k
      } else if (MO.isUse() && 
stageScheduled(*I) == StageInst12.07k
&&
3545
3.23k
                 
isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)1.20k
) {
3546
87
        if (MoveUse == 0) {
3547
87
          OrderBeforeDef = true;
3548
87
          MoveUse = Pos;
3549
87
        }
3550
87
      }
3551
3.58k
    }
3552
1.39k
    // Check for order dependences between instructions. Make sure the source
3553
1.39k
    // is ordered before the destination.
3554
1.47k
    for (auto &S : SU->Succs) {
3555
1.47k
      if (S.getSUnit() != *I)
3556
1.20k
        continue;
3557
268
      if (S.getKind() == SDep::Order && 
stageScheduled(*I) == StageInst113
) {
3558
7
        OrderBeforeUse = true;
3559
7
        if (Pos < MoveUse)
3560
0
          MoveUse = Pos;
3561
7
      }
3562
261
      // We did not handle HW dependences in previous for loop,
3563
261
      // and we normally set Latency = 0 for Anti deps,
3564
261
      // so may have nodes in same cycle with Anti denpendent on HW regs.
3565
261
      else if (S.getKind() == SDep::Anti && 
stageScheduled(*I) == StageInst129
) {
3566
1
        OrderBeforeUse = true;
3567
1
        if ((MoveUse == 0) || 
(Pos < MoveUse)0
)
3568
1
          MoveUse = Pos;
3569
1
      }
3570
268
    }
3571
2.03k
    for (auto &P : SU->Preds) {
3572
2.03k
      if (P.getSUnit() != *I)
3573
1.93k
        continue;
3574
93
      if (P.getKind() == SDep::Order && 
stageScheduled(*I) == StageInst13
) {
3575
3
        OrderAfterDef = true;
3576
3
        MoveDef = Pos;
3577
3
      }
3578
93
    }
3579
1.39k
  }
3580
1.41k
3581
1.41k
  // A circular dependence.
3582
1.41k
  if (OrderAfterDef && 
OrderBeforeUse160
&&
MoveUse == MoveDef16
)
3583
0
    OrderBeforeUse = false;
3584
1.41k
3585
1.41k
  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3586
1.41k
  // to a loop-carried dependence.
3587
1.41k
  if (OrderBeforeDef)
3588
86
    OrderBeforeUse = !OrderAfterDef || 
(MoveUse > MoveDef)33
;
3589
1.41k
3590
1.41k
  // The uncommon case when the instruction order needs to be updated because
3591
1.41k
  // there is both a use and def.
3592
1.41k
  if (OrderBeforeUse && 
OrderAfterDef253
) {
3593
46
    SUnit *UseSU = Insts.at(MoveUse);
3594
46
    SUnit *DefSU = Insts.at(MoveDef);
3595
46
    if (MoveUse > MoveDef) {
3596
42
      Insts.erase(Insts.begin() + MoveUse);
3597
42
      Insts.erase(Insts.begin() + MoveDef);
3598
42
    } else {
3599
4
      Insts.erase(Insts.begin() + MoveDef);
3600
4
      Insts.erase(Insts.begin() + MoveUse);
3601
4
    }
3602
46
    orderDependence(SSD, UseSU, Insts);
3603
46
    orderDependence(SSD, SU, Insts);
3604
46
    orderDependence(SSD, DefSU, Insts);
3605
46
    return;
3606
46
  }
3607
1.37k
  // Put the new instruction first if there is a use in the list. Otherwise,
3608
1.37k
  // put it at the end of the list.
3609
1.37k
  if (OrderBeforeUse)
3610
207
    Insts.push_front(SU);
3611
1.16k
  else
3612
1.16k
    Insts.push_back(SU);
3613
1.37k
}
3614
3615
/// Return true if the scheduled Phi has a loop carried operand.
3616
2.12k
bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
3617
2.12k
  if (!Phi.isPHI())
3618
624
    return false;
3619
1.50k
  assert(Phi.isPHI() && "Expecting a Phi.");
3620
1.50k
  SUnit *DefSU = SSD->getSUnit(&Phi);
3621
1.50k
  unsigned DefCycle = cycleScheduled(DefSU);
3622
1.50k
  int DefStage = stageScheduled(DefSU);
3623
1.50k
3624
1.50k
  unsigned InitVal = 0;
3625
1.50k
  unsigned LoopVal = 0;
3626
1.50k
  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3627
1.50k
  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3628
1.50k
  if (!UseSU)
3629
2
    return true;
3630
1.50k
  if (UseSU->getInstr()->isPHI())
3631
44
    return true;
3632
1.45k
  unsigned LoopCycle = cycleScheduled(UseSU);
3633
1.45k
  int LoopStage = stageScheduled(UseSU);
3634
1.45k
  return (LoopCycle > DefCycle) || 
(LoopStage <= DefStage)1.31k
;
3635
1.45k
}
3636
3637
/// Return true if the instruction is a definition that is loop carried
3638
/// and defines the use on the next iteration.
3639
///        v1 = phi(v2, v3)
3640
///  (Def) v3 = op v1
3641
///  (MO)   = v1
3642
/// If MO appears before Def, then then v1 and v3 may get assigned to the same
3643
/// register.
3644
bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
3645
1.20k
                                       MachineInstr *Def, MachineOperand &MO) {
3646
1.20k
  if (!MO.isReg())
3647
0
    return false;
3648
1.20k
  if (Def->isPHI())
3649
0
    return false;
3650
1.20k
  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3651
1.20k
  if (!Phi || !Phi->isPHI() || 
Phi->getParent() != Def->getParent()350
)
3652
866
    return false;
3653
343
  if (!isLoopCarried(SSD, *Phi))
3654
20
    return false;
3655
323
  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3656
1.10k
  for (unsigned i = 0, e = Def->getNumOperands(); i != e; 
++i784
) {
3657
871
    MachineOperand &DMO = Def->getOperand(i);
3658
871
    if (!DMO.isReg() || 
!DMO.isDef()691
)
3659
550
      continue;
3660
321
    if (DMO.getReg() == LoopReg)
3661
87
      return true;
3662
321
  }
3663
323
  
return false236
;
3664
323
}
3665
3666
// Check if the generated schedule is valid. This function checks if
3667
// an instruction that uses a physical register is scheduled in a
3668
// different stage than the definition. The pipeliner does not handle
3669
// physical register values that may cross a basic block boundary.
3670
177
bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
3671
1.83k
  for (int i = 0, e = SSD->SUnits.size(); i < e; 
++i1.65k
) {
3672
1.65k
    SUnit &SU = SSD->SUnits[i];
3673
1.65k
    if (!SU.hasPhysRegDefs)
3674
1.65k
      continue;
3675
2
    int StageDef = stageScheduled(&SU);
3676
2
    assert(StageDef != -1 && "Instruction should have been scheduled.");
3677
2
    for (auto &SI : SU.Succs)
3678
7
      if (SI.isAssignedRegDep())
3679
4
        if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3680
2
          if (stageScheduled(SI.getSUnit()) != StageDef)
3681
0
            return false;
3682
2
  }
3683
177
  return true;
3684
177
}
3685
3686
/// A property of the node order in swing-modulo-scheduling is
3687
/// that for nodes outside circuits the following holds:
3688
/// none of them is scheduled after both a successor and a
3689
/// predecessor.
3690
/// The method below checks whether the property is met.
3691
/// If not, debug information is printed and statistics information updated.
3692
/// Note that we do not use an assert statement.
3693
/// The reason is that although an invalid node oder may prevent
3694
/// the pipeliner from finding a pipelined schedule for arbitrary II,
3695
/// it does not lead to the generation of incorrect code.
3696
190
void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3697
190
3698
190
  // a sorted vector that maps each SUnit to its index in the NodeOrder
3699
190
  typedef std::pair<SUnit *, unsigned> UnitIndex;
3700
190
  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3701
190
3702
2.06k
  for (unsigned i = 0, s = NodeOrder.size(); i < s; 
++i1.87k
)
3703
1.87k
    Indices.push_back(std::make_pair(NodeOrder[i], i));
3704
190
3705
35.0k
  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3706
35.0k
    return std::get<0>(i1) < std::get<0>(i2);
3707
35.0k
  };
3708
190
3709
190
  // sort, so that we can perform a binary search
3710
190
  llvm::sort(Indices, CompareKey);
3711
190
3712
190
  bool Valid = true;
3713
190
  (void)Valid;
3714
190
  // for each SUnit in the NodeOrder, check whether
3715
190
  // it appears after both a successor and a predecessor
3716
190
  // of the SUnit. If this is the case, and the SUnit
3717
190
  // is not part of circuit, then the NodeOrder is not
3718
190
  // valid.
3719
2.06k
  for (unsigned i = 0, s = NodeOrder.size(); i < s; 
++i1.87k
) {
3720
1.87k
    SUnit *SU = NodeOrder[i];
3721
1.87k
    unsigned Index = i;
3722
1.87k
3723
1.87k
    bool PredBefore = false;
3724
1.87k
    bool SuccBefore = false;
3725
1.87k
3726
1.87k
    SUnit *Succ;
3727
1.87k
    SUnit *Pred;
3728
1.87k
    (void)Succ;
3729
1.87k
    (void)Pred;
3730
1.87k
3731
2.34k
    for (SDep &PredEdge : SU->Preds) {
3732
2.34k
      SUnit *PredSU = PredEdge.getSUnit();
3733
2.34k
      unsigned PredIndex = std::get<1>(
3734
2.34k
          *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
3735
2.34k
      if (!PredSU->getInstr()->isPHI() && 
PredIndex < Index1.38k
) {
3736
317
        PredBefore = true;
3737
317
        Pred = PredSU;
3738
317
        break;
3739
317
      }
3740
2.34k
    }
3741
1.87k
3742
1.87k
    for (SDep &SuccEdge : SU->Succs) {
3743
1.67k
      SUnit *SuccSU = SuccEdge.getSUnit();
3744
1.67k
      // Do not process a boundary node, it was not included in NodeOrder,
3745
1.67k
      // hence not in Indices either, call to std::lower_bound() below will
3746
1.67k
      // return Indices.end().
3747
1.67k
      if (SuccSU->isBoundaryNode())
3748
4
        continue;
3749
1.66k
      unsigned SuccIndex = std::get<1>(
3750
1.66k
          *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
3751
1.66k
      if (!SuccSU->getInstr()->isPHI() && 
SuccIndex < Index1.65k
) {
3752
1.19k
        SuccBefore = true;
3753
1.19k
        Succ = SuccSU;
3754
1.19k
        break;
3755
1.19k
      }
3756
1.66k
    }
3757
1.87k
3758
1.87k
    if (PredBefore && 
SuccBefore317
&&
!SU->getInstr()->isPHI()21
) {
3759
21
      // instructions in circuits are allowed to be scheduled
3760
21
      // after both a successor and predecessor.
3761
21
      bool InCircuit = llvm::any_of(
3762
322
          Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3763
21
      if (InCircuit)
3764
21
        LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
3765
21
      else {
3766
9
        Valid = false;
3767
9
        NumNodeOrderIssues++;
3768
9
        LLVM_DEBUG(dbgs() << "Predecessor ";);
3769
9
      }
3770
21
      LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
3771
21
                        << " are scheduled before node " << SU->NodeNum
3772
21
                        << "\n";);
3773
21
    }
3774
1.87k
  }
3775
190
3776
190
  LLVM_DEBUG({
3777
190
    if (!Valid)
3778
190
      dbgs() << "Invalid node order found!\n";
3779
190
  });
3780
190
}
3781
3782
/// Attempt to fix the degenerate cases when the instruction serialization
3783
/// causes the register lifetimes to overlap. For example,
3784
///   p' = store_pi(p, b)
3785
///      = load p, offset
3786
/// In this case p and p' overlap, which means that two registers are needed.
3787
/// Instead, this function changes the load to use p' and updates the offset.
3788
537
void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3789
537
  unsigned OverlapReg = 0;
3790
537
  unsigned NewBaseReg = 0;
3791
1.65k
  for (SUnit *SU : Instrs) {
3792
1.65k
    MachineInstr *MI = SU->getInstr();
3793
6.92k
    for (unsigned i = 0, e = MI->getNumOperands(); i < e; 
++i5.26k
) {
3794
5.51k
      const MachineOperand &MO = MI->getOperand(i);
3795
5.51k
      // Look for an instruction that uses p. The instruction occurs in the
3796
5.51k
      // same cycle but occurs later in the serialized order.
3797
5.51k
      if (MO.isReg() && 
MO.isUse()4.08k
&&
MO.getReg() == OverlapReg2.43k
) {
3798
2
        // Check that the instruction appears in the InstrChanges structure,
3799
2
        // which contains instructions that can have the offset updated.
3800
2
        DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
3801
2
          InstrChanges.find(SU);
3802
2
        if (It != InstrChanges.end()) {
3803
2
          unsigned BasePos, OffsetPos;
3804
2
          // Update the base register and adjust the offset.
3805
2
          if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3806
2
            MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3807
2
            NewMI->getOperand(BasePos).setReg(NewBaseReg);
3808
2
            int64_t NewOffset =
3809
2
                MI->getOperand(OffsetPos).getImm() - It->second.second;
3810
2
            NewMI->getOperand(OffsetPos).setImm(NewOffset);
3811
2
            SU->setInstr(NewMI);
3812
2
            MISUnitMap[NewMI] = SU;
3813
2
            NewMIs.insert(NewMI);
3814
2
          }
3815
2
        }
3816
2
        OverlapReg = 0;
3817
2
        NewBaseReg = 0;
3818
2
        break;
3819
2
      }
3820
5.51k
      // Look for an instruction of the form p' = op(p), which uses and defines
3821
5.51k
      // two virtual registers that get allocated to the same physical register.
3822
5.51k
      unsigned TiedUseIdx = 0;
3823
5.51k
      if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3824
254
        // OverlapReg is p in the example above.
3825
254
        OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3826
254
        // NewBaseReg is p' in the example above.
3827
254
        NewBaseReg = MI->getOperand(i).getReg();
3828
254
        break;
3829
254
      }
3830
5.51k
    }
3831
1.65k
  }
3832
537
}
3833
3834
/// After the schedule has been formed, call this function to combine
3835
/// the instructions from the different stages/cycles.  That is, this
3836
/// function creates a schedule that represents a single iteration.
3837
177
void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
3838
177
  // Move all instructions to the first stage from later stages.
3839
714
  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); 
++cycle537
) {
3840
1.04k
    for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3841
537
         
++stage511
) {
3842
511
      std::deque<SUnit *> &cycleInstrs =
3843
511
          ScheduledInstrs[cycle + (stage * InitiationInterval)];
3844
511
      for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3845
511
                                                 E = cycleInstrs.rend();
3846
1.11k
           I != E; 
++I607
)
3847
607
        ScheduledInstrs[cycle].push_front(*I);
3848
511
    }
3849
537
  }
3850
177
  // Iterate over the definitions in each instruction, and compute the
3851
177
  // stage difference for each use.  Keep the maximum value.
3852
1.65k
  for (auto &I : InstrToCycle) {
3853
1.65k
    int DefStage = stageScheduled(I.first);
3854
1.65k
    MachineInstr *MI = I.first->getInstr();
3855
7.85k
    for (unsigned i = 0, e = MI->getNumOperands(); i < e; 
++i6.20k
) {
3856
6.20k
      MachineOperand &Op = MI->getOperand(i);
3857
6.20k
      if (!Op.isReg() || 
!Op.isDef()4.58k
)
3858
4.54k
        continue;
3859
1.65k
3860
1.65k
      unsigned Reg = Op.getReg();
3861
1.65k
      unsigned MaxDiff = 0;
3862
1.65k
      bool PhiIsSwapped = false;
3863
1.65k
      for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3864
1.65k
                                             EI = MRI.use_end();
3865
3.86k
           UI != EI; 
++UI2.21k
) {
3866
2.21k
        MachineOperand &UseOp = *UI;
3867
2.21k
        MachineInstr *UseMI = UseOp.getParent();
3868
2.21k
        SUnit *SUnitUse = SSD->getSUnit(UseMI);
3869
2.21k
        int UseStage = stageScheduled(SUnitUse);
3870
2.21k
        unsigned Diff = 0;
3871
2.21k
        if (UseStage != -1 && 
UseStage >= DefStage2.13k
)
3872
2.10k
          Diff = UseStage - DefStage;
3873
2.21k
        if (MI->isPHI()) {
3874
661
          if (isLoopCarried(SSD, *MI))
3875
632
            ++Diff;
3876
29
          else
3877
29
            PhiIsSwapped = true;
3878
661
        }
3879
2.21k
        MaxDiff = std::max(Diff, MaxDiff);
3880
2.21k
      }
3881
1.65k
      RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3882
1.65k
    }
3883
1.65k
  }
3884
177
3885
177
  // Erase all the elements in the later stages. Only one iteration should
3886
177
  // remain in the scheduled list, and it contains all the instructions.
3887
577
  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; 
++cycle400
)
3888
400
    ScheduledInstrs.erase(cycle);
3889
177
3890
177
  // Change the registers in instruction as specified in the InstrChanges
3891
177
  // map. We need to use the new registers to create the correct order.
3892
1.83k
  for (int i = 0, e = SSD->SUnits.size(); i != e; 
++i1.65k
) {
3893
1.65k
    SUnit *SU = &SSD->SUnits[i];
3894
1.65k
    SSD->applyInstrChange(SU->getInstr(), *this);
3895
1.65k
  }
3896
177
3897
177
  // Reorder the instructions in each cycle to fix and improve the
3898
177
  // generated code.
3899
714
  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; 
++Cycle537
) {
3900
537
    std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3901
537
    std::deque<SUnit *> newOrderPhi;
3902
2.19k
    for (unsigned i = 0, e = cycleInstrs.size(); i < e; 
++i1.65k
) {
3903
1.65k
      SUnit *SU = cycleInstrs[i];
3904
1.65k
      if (SU->getInstr()->isPHI())
3905
381
        newOrderPhi.push_back(SU);
3906
1.65k
    }
3907
537
    std::deque<SUnit *> newOrderI;
3908
2.19k
    for (unsigned i = 0, e = cycleInstrs.size(); i < e; 
++i1.65k
) {
3909
1.65k
      SUnit *SU = cycleInstrs[i];
3910
1.65k
      if (!SU->getInstr()->isPHI())
3911
1.27k
        orderDependence(SSD, SU, newOrderI);
3912
1.65k
    }
3913
537
    // Replace the old order with the new order.
3914
537
    cycleInstrs.swap(newOrderPhi);
3915
537
    cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
3916
537
    SSD->fixupRegisterOverlaps(cycleInstrs);
3917
537
  }
3918
177
3919
177
  LLVM_DEBUG(dump(););
3920
177
}
3921
3922
0
void NodeSet::print(raw_ostream &os) const {
3923
0
  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3924
0
     << " depth " << MaxDepth << " col " << Colocate << "\n";
3925
0
  for (const auto &I : Nodes)
3926
0
    os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());
3927
0
  os << "\n";
3928
0
}
3929
3930
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3931
/// Print the schedule information to the given output.
3932
void SMSchedule::print(raw_ostream &os) const {
3933
  // Iterate over each cycle.
3934
  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3935
    // Iterate over each instruction in the cycle.
3936
    const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
3937
    for (SUnit *CI : cycleInstrs->second) {
3938
      os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3939
      os << "(" << CI->NodeNum << ") ";
3940
      CI->getInstr()->print(os);
3941
      os << "\n";
3942
    }
3943
  }
3944
}
3945
3946
/// Utility function used for debugging to print the schedule.
3947
LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
3948
LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
3949
3950
#endif
3951
3952
void ResourceManager::initProcResourceVectors(
3953
734
    const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
3954
734
  unsigned ProcResourceID = 0;
3955
734
3956
734
  // We currently limit the resource kinds to 64 and below so that we can use
3957
734
  // uint64_t for Masks
3958
734
  assert(SM.getNumProcResourceKinds() < 64 &&
3959
734
         "Too many kinds of resources, unsupported");
3960
734
  // Create a unique bitmask for every processor resource unit.
3961
734
  // Skip resource at index 0, since it always references 'InvalidUnit'.
3962
734
  Masks.resize(SM.getNumProcResourceKinds());
3963
1.10k
  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; 
++I374
) {
3964
374
    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3965
374
    if (Desc.SubUnitsIdxBegin)
3966
34
      continue;
3967
340
    Masks[I] = 1ULL << ProcResourceID;
3968
340
    ProcResourceID++;
3969
340
  }
3970
734
  // Create a unique bitmask for every processor resource group.
3971
1.10k
  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; 
++I374
) {
3972
374
    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3973
374
    if (!Desc.SubUnitsIdxBegin)
3974
340
      continue;
3975
34
    Masks[I] = 1ULL << ProcResourceID;
3976
204
    for (unsigned U = 0; U < Desc.NumUnits; 
++U170
)
3977
170
      Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
3978
34
    ProcResourceID++;
3979
34
  }
3980
734
  LLVM_DEBUG({
3981
734
    if (SwpShowResMask) {
3982
734
      dbgs() << "ProcResourceDesc:\n";
3983
734
      for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3984
734
        const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
3985
734
        dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3986
734
                         ProcResource->Name, I, Masks[I],
3987
734
                         ProcResource->NumUnits);
3988
734
      }
3989
734
      dbgs() << " -----------------\n";
3990
734
    }
3991
734
  });
3992
734
}
3993
3994
9.33k
bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
3995
9.33k
3996
9.33k
  LLVM_DEBUG({
3997
9.33k
    if (SwpDebugResource)
3998
9.33k
      dbgs() << "canReserveResources:\n";
3999
9.33k
  });
4000
9.33k
  if (UseDFA)
4001
9.25k
    return DFAResources->canReserveResources(MID);
4002
80
4003
80
  unsigned InsnClass = MID->getSchedClass();
4004
80
  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
4005
80
  if (!SCDesc->isValid()) {
4006
0
    LLVM_DEBUG({
4007
0
      dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4008
0
      dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
4009
0
    });
4010
0
    return true;
4011
0
  }
4012
80
4013
80
  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
4014
80
  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
4015
371
  for (; I != E; 
++I291
) {
4016
326
    if (!I->Cycles)
4017
0
      continue;
4018
326
    const MCProcResourceDesc *ProcResource =
4019
326
        SM.getProcResource(I->ProcResourceIdx);
4020
326
    unsigned NumUnits = ProcResource->NumUnits;
4021
326
    LLVM_DEBUG({
4022
326
      if (SwpDebugResource)
4023
326
        dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4024
326
                         ProcResource->Name, I->ProcResourceIdx,
4025
326
                         ProcResourceCount[I->ProcResourceIdx], NumUnits,
4026
326
                         I->Cycles);
4027
326
    });
4028
326
    if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
4029
35
      return false;
4030
326
  }
4031
80
  
LLVM_DEBUG45
(if (SwpDebugResource) dbgs() << "return true\n\n";);
4032
45
  return true;
4033
80
}
4034
4035
5.97k
void ResourceManager::reserveResources(const MCInstrDesc *MID) {
4036
5.97k
  LLVM_DEBUG({
4037
5.97k
    if (SwpDebugResource)
4038
5.97k
      dbgs() << "reserveResources:\n";
4039
5.97k
  });
4040
5.97k
  if (UseDFA)
4041
5.92k
    return DFAResources->reserveResources(MID);
4042
56
4043
56
  unsigned InsnClass = MID->getSchedClass();
4044
56
  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
4045
56
  if (!SCDesc->isValid()) {
4046
0
    LLVM_DEBUG({
4047
0
      dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4048
0
      dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
4049
0
    });
4050
0
    return;
4051
0
  }
4052
56
  for (const MCWriteProcResEntry &PRE :
4053
56
       make_range(STI->getWriteProcResBegin(SCDesc),
4054
311
                  STI->getWriteProcResEnd(SCDesc))) {
4055
311
    if (!PRE.Cycles)
4056
0
      continue;
4057
311
    ++ProcResourceCount[PRE.ProcResourceIdx];
4058
311
    LLVM_DEBUG({
4059
311
      if (SwpDebugResource) {
4060
311
        const MCProcResourceDesc *ProcResource =
4061
311
            SM.getProcResource(PRE.ProcResourceIdx);
4062
311
        dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4063
311
                         ProcResource->Name, PRE.ProcResourceIdx,
4064
311
                         ProcResourceCount[PRE.ProcResourceIdx],
4065
311
                         ProcResource->NumUnits, PRE.Cycles);
4066
311
      }
4067
311
    });
4068
311
  }
4069
56
  LLVM_DEBUG({
4070
56
    if (SwpDebugResource)
4071
56
      dbgs() << "reserveResources: done!\n\n";
4072
56
  });
4073
56
}
4074
4075
9.33k
bool ResourceManager::canReserveResources(const MachineInstr &MI) const {
4076
9.33k
  return canReserveResources(&MI.getDesc());
4077
9.33k
}
4078
4079
5.97k
void ResourceManager::reserveResources(const MachineInstr &MI) {
4080
5.97k
  return reserveResources(&MI.getDesc());
4081
5.97k
}
4082
4083
4.08k
void ResourceManager::clearResources() {
4084
4.08k
  if (UseDFA)
4085
4.06k
    return DFAResources->clearResources();
4086
24
  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
4087
24
}
4088