Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/CodeGen/MachinePipeliner.h
Line
Count
Source (jump to first uncovered line)
1
//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10
//
11
// Software pipelining (SWP) is an instruction scheduling technique for loops
12
// that overlap loop iterations and exploits ILP via a compiler transformation.
13
//
14
// Swing Modulo Scheduling is an implementation of software pipelining
15
// that generates schedules that are near optimal in terms of initiation
16
// interval, register requirements, and stage count. See the papers:
17
//
18
// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19
// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20
// Conference on Parallel Architectures and Compilation Techiniques.
21
//
22
// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23
// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24
// Transactions on Computers, Vol. 50, No. 3, 2001.
25
//
26
// "An Implementation of Swing Modulo Scheduling With Extensions for
27
// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28
// Urbana-Champaign, 2005.
29
//
30
//
31
// The SMS algorithm consists of three main steps after computing the minimal
32
// initiation interval (MII).
33
// 1) Analyze the dependence graph and compute information about each
34
//    instruction in the graph.
35
// 2) Order the nodes (instructions) by priority based upon the heuristics
36
//    described in the algorithm.
37
// 3) Attempt to schedule the nodes in the specified order using the MII.
38
//
39
//===----------------------------------------------------------------------===//
40
#ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
41
#define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
42
43
#include "llvm/CodeGen/MachineDominators.h"
44
#include "llvm/CodeGen/RegisterClassInfo.h"
45
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
46
#include "llvm/CodeGen/TargetInstrInfo.h"
47
48
namespace llvm {
49
50
class NodeSet;
51
class SMSchedule;
52
53
extern cl::opt<bool> SwpEnableCopyToPhi;
54
55
/// The main class in the implementation of the target independent
56
/// software pipeliner pass.
57
class MachinePipeliner : public MachineFunctionPass {
58
public:
59
  MachineFunction *MF = nullptr;
60
  const MachineLoopInfo *MLI = nullptr;
61
  const MachineDominatorTree *MDT = nullptr;
62
  const InstrItineraryData *InstrItins;
63
  const TargetInstrInfo *TII = nullptr;
64
  RegisterClassInfo RegClassInfo;
65
  bool disabledByPragma = false;
66
  unsigned II_setByPragma = 0;
67
68
#ifndef NDEBUG
69
  static int NumTries;
70
#endif
71
72
  /// Cache the target analysis information about the loop.
73
  struct LoopInfo {
74
    MachineBasicBlock *TBB = nullptr;
75
    MachineBasicBlock *FBB = nullptr;
76
    SmallVector<MachineOperand, 4> BrCond;
77
    MachineInstr *LoopInductionVar = nullptr;
78
    MachineInstr *LoopCompare = nullptr;
79
  };
80
  LoopInfo LI;
81
82
  static char ID;
83
84
2.52k
  MachinePipeliner() : MachineFunctionPass(ID) {
85
2.52k
    initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
86
2.52k
  }
87
88
  bool runOnMachineFunction(MachineFunction &MF) override;
89
90
2.49k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
91
2.49k
    AU.addRequired<AAResultsWrapperPass>();
92
2.49k
    AU.addPreserved<AAResultsWrapperPass>();
93
2.49k
    AU.addRequired<MachineLoopInfo>();
94
2.49k
    AU.addRequired<MachineDominatorTree>();
95
2.49k
    AU.addRequired<LiveIntervals>();
96
2.49k
    MachineFunctionPass::getAnalysisUsage(AU);
97
2.49k
  }
98
99
private:
100
  void preprocessPhiNodes(MachineBasicBlock &B);
101
  bool canPipelineLoop(MachineLoop &L);
102
  bool scheduleLoop(MachineLoop &L);
103
  bool swingModuloScheduler(MachineLoop &L);
104
  void setPragmaPipelineOptions(MachineLoop &L);
105
};
106
107
/// This class builds the dependence graph for the instructions in a loop,
108
/// and attempts to schedule the instructions using the SMS algorithm.
109
class SwingSchedulerDAG : public ScheduleDAGInstrs {
110
  MachinePipeliner &Pass;
111
  /// The minimum initiation interval between iterations for this schedule.
112
  unsigned MII = 0;
113
  /// The maximum initiation interval between iterations for this schedule.
114
  unsigned MAX_II = 0;
115
  /// Set to true if a valid pipelined schedule is found for the loop.
116
  bool Scheduled = false;
117
  MachineLoop &Loop;
118
  LiveIntervals &LIS;
119
  const RegisterClassInfo &RegClassInfo;
120
  unsigned II_setByPragma = 0;
121
122
  /// A toplogical ordering of the SUnits, which is needed for changing
123
  /// dependences and iterating over the SUnits.
124
  ScheduleDAGTopologicalSort Topo;
125
126
  struct NodeInfo {
127
    int ASAP = 0;
128
    int ALAP = 0;
129
    int ZeroLatencyDepth = 0;
130
    int ZeroLatencyHeight = 0;
131
132
1.87k
    NodeInfo() = default;
133
  };
134
  /// Computed properties for each node in the graph.
135
  std::vector<NodeInfo> ScheduleInfo;
136
137
  enum OrderKind { BottomUp = 0, TopDown = 1 };
138
  /// Computed node ordering for scheduling.
139
  SetVector<SUnit *> NodeOrder;
140
141
  using NodeSetType = SmallVector<NodeSet, 8>;
142
  using ValueMapTy = DenseMap<unsigned, unsigned>;
143
  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
144
  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
145
146
  /// Instructions to change when emitting the final schedule.
147
  DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
148
149
  /// We may create a new instruction, so remember it because it
150
  /// must be deleted when the pass is finished.
151
  SmallPtrSet<MachineInstr *, 4> NewMIs;
152
153
  /// Ordered list of DAG postprocessing steps.
154
  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
155
156
  /// Helper class to implement Johnson's circuit finding algorithm.
157
  class Circuits {
158
    std::vector<SUnit> &SUnits;
159
    SetVector<SUnit *> Stack;
160
    BitVector Blocked;
161
    SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
162
    SmallVector<SmallVector<int, 4>, 16> AdjK;
163
    // Node to Index from ScheduleDAGTopologicalSort
164
    std::vector<int> *Node2Idx;
165
    unsigned NumPaths;
166
    static unsigned MaxPaths;
167
168
  public:
169
    Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
170
192
        : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
171
192
      Node2Idx = new std::vector<int>(SUs.size());
172
192
      unsigned Idx = 0;
173
192
      for (const auto &NodeNum : Topo)
174
2.03k
        Node2Idx->at(NodeNum) = Idx++;
175
192
    }
176
177
192
    ~Circuits() { delete Node2Idx; }
178
179
    /// Reset the data structures used in the circuit algorithm.
180
2.03k
    void reset() {
181
2.03k
      Stack.clear();
182
2.03k
      Blocked.reset();
183
2.03k
      B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
184
2.03k
      NumPaths = 0;
185
2.03k
    }
186
187
    void createAdjacencyStructure(SwingSchedulerDAG *DAG);
188
    bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
189
    void unblock(int U);
190
  };
191
192
  struct CopyToPhiMutation : public ScheduleDAGMutation {
193
    void apply(ScheduleDAGInstrs *DAG) override;
194
  };
195
196
public:
197
  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
198
                    const RegisterClassInfo &rci, unsigned II)
199
      : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
200
192
        RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
201
192
    P.MF->getSubtarget().getSMSMutations(Mutations);
202
192
    if (SwpEnableCopyToPhi)
203
192
      Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
204
192
  }
205
206
  void schedule() override;
207
  void finishBlock() override;
208
209
  /// Return true if the loop kernel has been scheduled.
210
192
  bool hasNewSchedule() { return Scheduled; }
211
212
  /// Return the earliest time an instruction may be scheduled.
213
7.62k
  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
214
215
  /// Return the latest time an instruction my be scheduled.
216
5.48k
  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
217
218
  /// The mobility function, which the number of slots in which
219
  /// an instruction may be scheduled.
220
3.26k
  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
221
222
  /// The depth, in the dependence graph, for a node.
223
11.5k
  unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
224
225
  /// The maximum unweighted length of a path from an arbitrary node to the
226
  /// given node in which each edge has latency 0
227
4.56k
  int getZeroLatencyDepth(SUnit *Node) {
228
4.56k
    return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
229
4.56k
  }
230
231
  /// The height, in the dependence graph, for a node.
232
3.50k
  unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
233
234
  /// The maximum unweighted length of a path from the given node to an
235
  /// arbitrary node in which each edge has latency 0
236
2.29k
  int getZeroLatencyHeight(SUnit *Node) {
237
2.29k
    return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
238
2.29k
  }
239
240
  /// Return true if the dependence is a back-edge in the data dependence graph.
241
  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
242
  /// using an anti dependence from a Phi to an instruction.
243
93.3k
  bool isBackedge(SUnit *Source, const SDep &Dep) {
244
93.3k
    if (Dep.getKind() != SDep::Anti)
245
85.8k
      return false;
246
7.49k
    return Source->getInstr()->isPHI() || 
Dep.getSUnit()->getInstr()->isPHI()7.13k
;
247
7.49k
  }
248
249
  bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
250
251
  /// The distance function, which indicates that operation V of iteration I
252
  /// depends on operations U of iteration I-distance.
253
8.85k
  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
254
8.85k
    // Instructions that feed a Phi have a distance of 1. Computing larger
255
8.85k
    // values for arrays requires data dependence information.
256
8.85k
    if (V->getInstr()->isPHI() && 
Dep.getKind() == SDep::Anti485
)
257
413
      return 1;
258
8.43k
    return 0;
259
8.43k
  }
260
261
  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
262
263
  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
264
265
  /// Return the new base register that was stored away for the changed
266
  /// instruction.
267
415
  unsigned getInstrBaseReg(SUnit *SU) {
268
415
    DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
269
415
        InstrChanges.find(SU);
270
415
    if (It != InstrChanges.end())
271
53
      return It->second.first;
272
362
    return 0;
273
362
  }
274
275
0
  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
276
0
    Mutations.push_back(std::move(Mutation));
277
0
  }
278
279
0
  static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
280
281
private:
282
  void addLoopCarriedDependences(AliasAnalysis *AA);
283
  void updatePhiDependences();
284
  void changeDependences();
285
  unsigned calculateResMII();
286
  unsigned calculateRecMII(NodeSetType &RecNodeSets);
287
  void findCircuits(NodeSetType &NodeSets);
288
  void fuseRecs(NodeSetType &NodeSets);
289
  void removeDuplicateNodes(NodeSetType &NodeSets);
290
  void computeNodeFunctions(NodeSetType &NodeSets);
291
  void registerPressureFilter(NodeSetType &NodeSets);
292
  void colocateNodeSets(NodeSetType &NodeSets);
293
  void checkNodeSets(NodeSetType &NodeSets);
294
  void groupRemainingNodes(NodeSetType &NodeSets);
295
  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
296
                         SetVector<SUnit *> &NodesAdded);
297
  void computeNodeOrder(NodeSetType &NodeSets);
298
  void checkValidNodeOrder(const NodeSetType &Circuits) const;
299
  bool schedulePipeline(SMSchedule &Schedule);
300
  void generatePipelinedLoop(SMSchedule &Schedule);
301
  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
302
                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
303
                      MBBVectorTy &PrologBBs);
304
  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
305
                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
306
                      MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
307
  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
308
                            MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
309
                            SMSchedule &Schedule, ValueMapTy *VRMap,
310
                            InstrMapTy &InstrMap, unsigned LastStageNum,
311
                            unsigned CurStageNum, bool IsLast);
312
  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
313
                    MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
314
                    SMSchedule &Schedule, ValueMapTy *VRMap,
315
                    InstrMapTy &InstrMap, unsigned LastStageNum,
316
                    unsigned CurStageNum, bool IsLast);
317
  void removeDeadInstructions(MachineBasicBlock *KernelBB,
318
                              MBBVectorTy &EpilogBBs);
319
  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
320
                      SMSchedule &Schedule);
321
  void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
322
                   MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
323
                   SMSchedule &Schedule, ValueMapTy *VRMap);
324
  bool computeDelta(MachineInstr &MI, unsigned &Delta);
325
  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
326
                         unsigned Num);
327
  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
328
                           unsigned InstStageNum);
329
  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
330
                                    unsigned InstStageNum,
331
                                    SMSchedule &Schedule);
332
  void updateInstruction(MachineInstr *NewMI, bool LastDef,
333
                         unsigned CurStageNum, unsigned InstrStageNum,
334
                         SMSchedule &Schedule, ValueMapTy *VRMap);
335
  MachineInstr *findDefInLoop(unsigned Reg);
336
  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
337
                         unsigned LoopStage, ValueMapTy *VRMap,
338
                         MachineBasicBlock *BB);
339
  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
340
                        SMSchedule &Schedule, ValueMapTy *VRMap,
341
                        InstrMapTy &InstrMap);
342
  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
343
                             InstrMapTy &InstrMap, unsigned CurStageNum,
344
                             unsigned PhiNum, MachineInstr *Phi,
345
                             unsigned OldReg, unsigned NewReg,
346
                             unsigned PrevReg = 0);
347
  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
348
                             unsigned &OffsetPos, unsigned &NewBase,
349
                             int64_t &NewOffset);
350
  void postprocessDAG();
351
  /// Set the Minimum Initiation Interval for this schedule attempt.
352
  void setMII(unsigned ResMII, unsigned RecMII);
353
  /// Set the Maximum Initiation Interval for this schedule attempt.
354
  void setMAX_II();
355
};
356
357
/// A NodeSet contains a set of SUnit DAG nodes with additional information
358
/// that assigns a priority to the set.
359
class NodeSet {
360
  SetVector<SUnit *> Nodes;
361
  bool HasRecurrence = false;
362
  unsigned RecMII = 0;
363
  int MaxMOV = 0;
364
  unsigned MaxDepth = 0;
365
  unsigned Colocate = 0;
366
  SUnit *ExceedPressure = nullptr;
367
  unsigned Latency = 0;
368
369
public:
370
  using iterator = SetVector<SUnit *>::const_iterator;
371
372
190
  NodeSet() = default;
373
623
  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
374
623
    Latency = 0;
375
2.74k
    for (unsigned i = 0, e = Nodes.size(); i < e; 
++i2.12k
)
376
2.12k
      for (const SDep &Succ : Nodes[i]->Succs)
377
3.56k
        if (Nodes.count(Succ.getSUnit()))
378
2.15k
          Latency += Succ.getLatency();
379
623
  }
380
381
1.34k
  bool insert(SUnit *SU) { return Nodes.insert(SU); }
382
383
90
  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
384
385
789
  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
386
789
    return Nodes.remove_if(P);
387
789
  }
388
389
16.8k
  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
390
391
0
  bool hasRecurrence() { return HasRecurrence; };
392
393
437
  unsigned size() const { return Nodes.size(); }
394
395
2.48k
  bool empty() const { return Nodes.empty(); }
396
397
3.14k
  SUnit *getNode(unsigned i) const { return Nodes[i]; };
398
399
644
  void setRecMII(unsigned mii) { RecMII = mii; };
400
401
56
  void setColocate(unsigned c) { Colocate = c; };
402
403
1
  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
404
405
1.35k
  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
406
407
666
  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
408
409
24
  int getRecMII() { return RecMII; }
410
411
  /// Summarize node functions for the entire node set.
412
437
  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
413
1.34k
    for (SUnit *SU : *this) {
414
1.34k
      MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
415
1.34k
      MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
416
1.34k
    }
417
437
  }
418
419
623
  unsigned getLatency() { return Latency; }
420
421
3
  unsigned getMaxDepth() { return MaxDepth; }
422
423
200
  void clear() {
424
200
    Nodes.clear();
425
200
    RecMII = 0;
426
200
    HasRecurrence = false;
427
200
    MaxMOV = 0;
428
200
    MaxDepth = 0;
429
200
    Colocate = 0;
430
200
    ExceedPressure = nullptr;
431
200
  }
432
433
2.19k
  operator SetVector<SUnit *> &() { return Nodes; }
434
435
  /// Sort the node sets by importance. First, rank them by recurrence MII,
436
  /// then by mobility (least mobile done first), and finally by depth.
437
  /// Each node set may contain a colocate value which is used as the first
438
  /// tie breaker, if it's set.
439
408
  bool operator>(const NodeSet &RHS) const {
440
408
    if (RecMII == RHS.RecMII) {
441
243
      if (Colocate != 0 && 
RHS.Colocate != 037
&&
Colocate != RHS.Colocate31
)
442
8
        return Colocate < RHS.Colocate;
443
235
      if (MaxMOV == RHS.MaxMOV)
444
102
        return MaxDepth > RHS.MaxDepth;
445
133
      return MaxMOV < RHS.MaxMOV;
446
133
    }
447
165
    return RecMII > RHS.RecMII;
448
165
  }
449
450
0
  bool operator==(const NodeSet &RHS) const {
451
0
    return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
452
0
           MaxDepth == RHS.MaxDepth;
453
0
  }
454
455
0
  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
456
457
1.61k
  iterator begin() { return Nodes.begin(); }
458
1.61k
  iterator end() { return Nodes.end(); }
459
  void print(raw_ostream &os) const;
460
461
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
462
  LLVM_DUMP_METHOD void dump() const;
463
#endif
464
};
465
466
// 16 was selected based on the number of ProcResource kinds for all
467
// existing Subtargets, so that SmallVector don't need to resize too often.
468
static const int DefaultProcResSize = 16;
469
470
class ResourceManager {
471
private:
472
  const MCSubtargetInfo *STI;
473
  const MCSchedModel &SM;
474
  const bool UseDFA;
475
  std::unique_ptr<DFAPacketizer> DFAResources;
476
  /// Each processor resource is associated with a so-called processor resource
477
  /// mask. This vector allows to correlate processor resource IDs with
478
  /// processor resource masks. There is exactly one element per each processor
479
  /// resource declared by the scheduling model.
480
  llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
481
482
  llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceCount;
483
484
public:
485
  ResourceManager(const TargetSubtargetInfo *ST)
486
      : STI(ST), SM(ST->getSchedModel()), UseDFA(ST->useDFAforSMS()),
487
        ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
488
734
        ProcResourceCount(SM.getNumProcResourceKinds(), 0) {
489
734
    if (UseDFA)
490
717
      DFAResources.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
491
734
    initProcResourceVectors(SM, ProcResourceMasks);
492
734
  }
493
494
  void initProcResourceVectors(const MCSchedModel &SM,
495
                               SmallVectorImpl<uint64_t> &Masks);
496
  /// Check if the resources occupied by a MCInstrDesc are available in
497
  /// the current state.
498
  bool canReserveResources(const MCInstrDesc *MID) const;
499
500
  /// Reserve the resources occupied by a MCInstrDesc and change the current
501
  /// state to reflect that change.
502
  void reserveResources(const MCInstrDesc *MID);
503
504
  /// Check if the resources occupied by a machine instruction are available
505
  /// in the current state.
506
  bool canReserveResources(const MachineInstr &MI) const;
507
508
  /// Reserve the resources occupied by a machine instruction and change the
509
  /// current state to reflect that change.
510
  void reserveResources(const MachineInstr &MI);
511
512
  /// Reset the state
513
  void clearResources();
514
};
515
516
/// This class represents the scheduled code.  The main data structure is a
517
/// map from scheduled cycle to instructions.  During scheduling, the
518
/// data structure explicitly represents all stages/iterations.   When
519
/// the algorithm finshes, the schedule is collapsed into a single stage,
520
/// which represents instructions from different loop iterations.
521
///
522
/// The SMS algorithm allows negative values for cycles, so the first cycle
523
/// in the schedule is the smallest cycle value.
524
class SMSchedule {
525
private:
526
  /// Map from execution cycle to instructions.
527
  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
528
529
  /// Map from instruction to execution cycle.
530
  std::map<SUnit *, int> InstrToCycle;
531
532
  /// Map for each register and the max difference between its uses and def.
533
  /// The first element in the pair is the max difference in stages. The
534
  /// second is true if the register defines a Phi value and loop value is
535
  /// scheduled before the Phi.
536
  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
537
538
  /// Keep track of the first cycle value in the schedule.  It starts
539
  /// as zero, but the algorithm allows negative values.
540
  int FirstCycle = 0;
541
542
  /// Keep track of the last cycle value in the schedule.
543
  int LastCycle = 0;
544
545
  /// The initiation interval (II) for the schedule.
546
  int InitiationInterval = 0;
547
548
  /// Target machine information.
549
  const TargetSubtargetInfo &ST;
550
551
  /// Virtual register information.
552
  MachineRegisterInfo &MRI;
553
554
  ResourceManager ProcItinResources;
555
556
public:
557
  SMSchedule(MachineFunction *mf)
558
190
      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), ProcItinResources(&ST) {}
559
560
293
  void reset() {
561
293
    ScheduledInstrs.clear();
562
293
    InstrToCycle.clear();
563
293
    RegToStageDiff.clear();
564
293
    FirstCycle = 0;
565
293
    LastCycle = 0;
566
293
    InitiationInterval = 0;
567
293
  }
568
569
  /// Set the initiation interval for this schedule.
570
288
  void setInitiationInterval(int ii) { InitiationInterval = ii; }
571
572
  /// Return the first cycle in the completed schedule.  This
573
  /// can be a negative value.
574
4.08k
  int getFirstCycle() const { return FirstCycle; }
575
576
  /// Return the last cycle in the finalized schedule.
577
1.19k
  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
578
579
  /// Return the cycle of the earliest scheduled instruction in the dependence
580
  /// chain.
581
  int earliestCycleInChain(const SDep &Dep);
582
583
  /// Return the cycle of the latest scheduled instruction in the dependence
584
  /// chain.
585
  int latestCycleInChain(const SDep &Dep);
586
587
  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
588
                    int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
589
  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
590
591
  /// Iterators for the cycle to instruction map.
592
  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
593
  using const_sched_iterator =
594
      DenseMap<int, std::deque<SUnit *>>::const_iterator;
595
596
  /// Return true if the instruction is scheduled at the specified stage.
597
4.90k
  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
598
4.90k
    return (stageScheduled(SU) == (int)StageNum);
599
4.90k
  }
600
601
  /// Return the stage for a scheduled instruction.  Return -1 if
602
  /// the instruction has not been scheduled.
603
29.8k
  int stageScheduled(SUnit *SU) const {
604
29.8k
    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
605
29.8k
    if (it == InstrToCycle.end())
606
1.53k
      return -1;
607
28.2k
    return (it->second - FirstCycle) / InitiationInterval;
608
28.2k
  }
609
610
  /// Return the cycle for a scheduled instruction. This function normalizes
611
  /// the first cycle to be 0.
612
5.88k
  unsigned cycleScheduled(SUnit *SU) const {
613
5.88k
    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
614
5.88k
    assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
615
5.88k
    return (it->second - FirstCycle) % InitiationInterval;
616
5.88k
  }
617
618
  /// Return the maximum stage count needed for this schedule.
619
9.31k
  unsigned getMaxStageCount() {
620
9.31k
    return (LastCycle - FirstCycle) / InitiationInterval;
621
9.31k
  }
622
623
  /// Return the max. number of stages/iterations that can occur between a
624
  /// register definition and its uses.
625
3.11k
  unsigned getStagesForReg(int Reg, unsigned CurStage) {
626
3.11k
    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
627
3.11k
    if (CurStage > getMaxStageCount() && 
Stages.first == 01.85k
&&
Stages.second1.04k
)
628
8
      return 1;
629
3.10k
    return Stages.first;
630
3.10k
  }
631
632
  /// The number of stages for a Phi is a little different than other
633
  /// instructions. The minimum value computed in RegToStageDiff is 1
634
  /// because we assume the Phi is needed for at least 1 iteration.
635
  /// This is not the case if the loop value is scheduled prior to the
636
  /// Phi in the same stage.  This function returns the number of stages
637
  /// or iterations needed between the Phi definition and any uses.
638
431
  unsigned getStagesForPhi(int Reg) {
639
431
    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
640
431
    if (Stages.second)
641
35
      return Stages.first;
642
396
    return Stages.first - 1;
643
396
  }
644
645
  /// Return the instructions that are scheduled at the specified cycle.
646
26.8k
  std::deque<SUnit *> &getInstructions(int cycle) {
647
26.8k
    return ScheduledInstrs[cycle];
648
26.8k
  }
649
650
  bool isValidSchedule(SwingSchedulerDAG *SSD);
651
  void finalizeSchedule(SwingSchedulerDAG *SSD);
652
  void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
653
                       std::deque<SUnit *> &Insts);
654
  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
655
  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
656
                             MachineOperand &MO);
657
  void print(raw_ostream &os) const;
658
  void dump() const;
659
};
660
661
} // end namespace llvm
662
663
#endif // LLVM_LIB_CODEGEN_MACHINEPIPELINER_H