Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
Line
Count
Source (jump to first uncovered line)
1
//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
/// \file
11
/// \brief SI Machine Scheduler interface
12
//
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18
#include "SIInstrInfo.h"
19
#include "llvm/CodeGen/MachineBasicBlock.h"
20
#include "llvm/CodeGen/MachineScheduler.h"
21
#include "llvm/CodeGen/RegisterPressure.h"
22
#include "llvm/CodeGen/ScheduleDAG.h"
23
#include <cassert>
24
#include <cstdint>
25
#include <map>
26
#include <memory>
27
#include <set>
28
#include <vector>
29
30
namespace llvm {
31
32
enum SIScheduleCandReason {
33
  NoCand,
34
  RegUsage,
35
  Latency,
36
  Successor,
37
  Depth,
38
  NodeOrder
39
};
40
41
struct SISchedulerCandidate {
42
  // The reason for this candidate.
43
  SIScheduleCandReason Reason = NoCand;
44
45
  // Set of reasons that apply to multiple candidates.
46
  uint32_t RepeatReasonSet = 0;
47
48
53
  SISchedulerCandidate() = default;
49
50
0
  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51
23
  void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
52
};
53
54
class SIScheduleDAGMI;
55
class SIScheduleBlockCreator;
56
57
enum SIScheduleBlockLinkKind {
58
  NoData,
59
  Data
60
};
61
62
class SIScheduleBlock {
63
  SIScheduleDAGMI *DAG;
64
  SIScheduleBlockCreator *BC;
65
66
  std::vector<SUnit*> SUnits;
67
  std::map<unsigned, unsigned> NodeNum2Index;
68
  std::vector<SUnit*> TopReadySUs;
69
  std::vector<SUnit*> ScheduledSUnits;
70
71
  /// The top of the unscheduled zone.
72
  IntervalPressure TopPressure;
73
  RegPressureTracker TopRPTracker;
74
75
  // Pressure: number of said class of registers needed to
76
  // store the live virtual and real registers.
77
  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
78
  // Pressure of additional registers required inside the block.
79
  std::vector<unsigned> InternalAdditionnalPressure;
80
  // Pressure of input and output registers
81
  std::vector<unsigned> LiveInPressure;
82
  std::vector<unsigned> LiveOutPressure;
83
  // Registers required by the block, and outputs.
84
  // We do track only virtual registers.
85
  // Note that some registers are not 32 bits,
86
  // and thus the pressure is not equal
87
  // to the number of live registers.
88
  std::set<unsigned> LiveInRegs;
89
  std::set<unsigned> LiveOutRegs;
90
91
  bool Scheduled = false;
92
  bool HighLatencyBlock = false;
93
94
  std::vector<unsigned> HasLowLatencyNonWaitedParent;
95
96
  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
97
  unsigned ID;
98
99
  std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
100
  // All blocks successors, and the kind of link
101
  std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
102
  unsigned NumHighLatencySuccessors = 0;
103
104
public:
105
  SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
106
                  unsigned ID):
107
5
    DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
108
109
5
  ~SIScheduleBlock() = default;
110
111
156
  unsigned getID() const { return ID; }
112
113
  /// Functions for Block construction.
114
  void addUnit(SUnit *SU);
115
116
  // When all SUs have been added.
117
  void finalizeUnits();
118
119
  // Add block pred, which has instruction predecessor of SU.
120
  void addPred(SIScheduleBlock *Pred);
121
  void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
122
123
34
  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
124
  ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
125
28
    getSuccs() const { return Succs; }
126
127
  unsigned Height;  // Maximum topdown path length to block without outputs
128
  unsigned Depth;   // Maximum bottomup path length to block without inputs
129
130
5
  unsigned getNumHighLatencySuccessors() const {
131
5
    return NumHighLatencySuccessors;
132
5
  }
133
134
13
  bool isHighLatencyBlock() { return HighLatencyBlock; }
135
136
  // This is approximative.
137
  // Ideally should take into accounts some instructions (rcp, etc)
138
  // are 4 times slower.
139
11
  int getCost() { return SUnits.size(); }
140
141
  // The block Predecessors and Successors must be all registered
142
  // before fastSchedule().
143
  // Fast schedule with no particular requirement.
144
  void fastSchedule();
145
146
15
  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
147
148
  // Complete schedule that will try to minimize reg pressure and
149
  // low latencies, and will fill liveins and liveouts.
150
  // Needs all MIs to be grouped between BeginBlock and EndBlock.
151
  // The MIs can be moved after the scheduling,
152
  // it is just used to allow correct track of live registers.
153
  void schedule(MachineBasicBlock::iterator BeginBlock,
154
                MachineBasicBlock::iterator EndBlock);
155
156
0
  bool isScheduled() { return Scheduled; }
157
158
  // Needs the block to be scheduled inside
159
  // TODO: find a way to compute it.
160
0
  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
161
0
    return InternalAdditionnalPressure;
162
0
  }
163
164
20
  std::set<unsigned> &getInRegs() { return LiveInRegs; }
165
25
  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
166
167
  void printDebug(bool Full);
168
169
private:
170
  struct SISchedCandidate : SISchedulerCandidate {
171
    // The best SUnit candidate.
172
    SUnit *SU = nullptr;
173
174
    unsigned SGPRUsage;
175
    unsigned VGPRUsage;
176
    bool IsLowLatency;
177
    unsigned LowLatencyOffset;
178
    bool HasLowLatencyNonWaitedParent;
179
180
41
    SISchedCandidate() = default;
181
182
25
    bool isValid() const { return SU; }
183
184
    // Copy the status of another candidate without changing policy.
185
19
    void setBest(SISchedCandidate &Best) {
186
19
      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
187
19
      SU = Best.SU;
188
19
      Reason = Best.Reason;
189
19
      SGPRUsage = Best.SGPRUsage;
190
19
      VGPRUsage = Best.VGPRUsage;
191
19
      IsLowLatency = Best.IsLowLatency;
192
19
      LowLatencyOffset = Best.LowLatencyOffset;
193
19
      HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
194
19
    }
195
  };
196
197
  void undoSchedule();
198
199
  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
200
  void releaseSucc(SUnit *SU, SDep *SuccEdge);
201
  // InOrOutBlock: restrict to links pointing inside the block (true),
202
  // or restrict to links pointing outside the block (false).
203
  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
204
205
  void nodeScheduled(SUnit *SU);
206
  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207
  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
208
  SUnit* pickNode();
209
  void traceCandidate(const SISchedCandidate &Cand);
210
  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
211
                       MachineBasicBlock::iterator EndBlock);
212
};
213
214
struct SIScheduleBlocks {
215
  std::vector<SIScheduleBlock*> Blocks;
216
  std::vector<int> TopDownIndex2Block;
217
  std::vector<int> TopDownBlock2Index;
218
};
219
220
enum SISchedulerBlockCreatorVariant {
221
  LatenciesAlone,
222
  LatenciesGrouped,
223
  LatenciesAlonePlusConsecutive
224
};
225
226
class SIScheduleBlockCreator {
227
  SIScheduleDAGMI *DAG;
228
  // unique_ptr handles freeing memory for us.
229
  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
230
  std::map<SISchedulerBlockCreatorVariant,
231
           SIScheduleBlocks> Blocks;
232
  std::vector<SIScheduleBlock*> CurrentBlocks;
233
  std::vector<int> Node2CurrentBlock;
234
235
  // Topological sort
236
  // Maps topological index to the node number.
237
  std::vector<int> TopDownIndex2Block;
238
  std::vector<int> TopDownBlock2Index;
239
  std::vector<int> BottomUpIndex2Block;
240
241
  // 0 -> Color not given.
242
  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
243
  // Above -> Other groups.
244
  int NextReservedID;
245
  int NextNonReservedID;
246
  std::vector<int> CurrentColoring;
247
  std::vector<int> CurrentTopDownReservedDependencyColoring;
248
  std::vector<int> CurrentBottomUpReservedDependencyColoring;
249
250
public:
251
  SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
252
  ~SIScheduleBlockCreator();
253
254
  SIScheduleBlocks
255
  getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
256
257
  bool isSUInBlock(SUnit *SU, unsigned ID);
258
259
private:
260
  // Give a Reserved color to every high latency.
261
  void colorHighLatenciesAlone();
262
263
  // Create groups of high latencies with a Reserved color.
264
  void colorHighLatenciesGroups();
265
266
  // Compute coloring for topdown and bottom traversals with
267
  // different colors depending on dependencies on Reserved colors.
268
  void colorComputeReservedDependencies();
269
270
  // Give color to all non-colored SUs according to Reserved groups dependencies.
271
  void colorAccordingToReservedDependencies();
272
273
  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
274
  // The new colors are computed according to the dependencies on the other blocks
275
  // formed with colorAccordingToReservedDependencies.
276
  void colorEndsAccordingToDependencies();
277
278
  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
279
  void colorForceConsecutiveOrderInGroup();
280
281
  // Merge Constant loads that have all their users into another group to the group.
282
  // (TODO: else if all their users depend on the same group, put them there)
283
  void colorMergeConstantLoadsNextGroup();
284
285
  // Merge SUs that have all their users into another group to the group
286
  void colorMergeIfPossibleNextGroup();
287
288
  // Merge SUs that have all their users into another group to the group,
289
  // but only for Reserved groups.
290
  void colorMergeIfPossibleNextGroupOnlyForReserved();
291
292
  // Merge SUs that have all their users into another group to the group,
293
  // but only if the group is no more than a few SUs.
294
  void colorMergeIfPossibleSmallGroupsToNextGroup();
295
296
  // Divides Blocks with important size.
297
  // Idea of implementation: attribute new colors depending on topdown and
298
  // bottom up links to other blocks.
299
  void cutHugeBlocks();
300
301
  // Put in one group all instructions with no users in this scheduling region
302
  // (we'd want these groups be at the end).
303
  void regroupNoUserInstructions();
304
305
  // Give Reserved color to export instructions
306
  void colorExports();
307
308
  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
309
310
  void topologicalSort();
311
312
  void scheduleInsideBlocks();
313
314
  void fillStats();
315
};
316
317
enum SISchedulerBlockSchedulerVariant {
318
  BlockLatencyRegUsage,
319
  BlockRegUsageLatency,
320
  BlockRegUsage
321
};
322
323
class SIScheduleBlockScheduler {
324
  SIScheduleDAGMI *DAG;
325
  SISchedulerBlockSchedulerVariant Variant;
326
  std::vector<SIScheduleBlock*> Blocks;
327
328
  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
329
  std::set<unsigned> LiveRegs;
330
  // Num of schedulable unscheduled blocks reading the register.
331
  std::map<unsigned, unsigned> LiveRegsConsumers;
332
333
  std::vector<unsigned> LastPosHighLatencyParentScheduled;
334
  int LastPosWaitedHighLatency;
335
336
  std::vector<SIScheduleBlock*> BlocksScheduled;
337
  unsigned NumBlockScheduled;
338
  std::vector<SIScheduleBlock*> ReadyBlocks;
339
340
  unsigned VregCurrentUsage;
341
  unsigned SregCurrentUsage;
342
343
  // Currently is only approximation.
344
  unsigned maxVregUsage;
345
  unsigned maxSregUsage;
346
347
  std::vector<unsigned> BlockNumPredsLeft;
348
  std::vector<unsigned> BlockNumSuccsLeft;
349
350
public:
351
  SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
352
                           SISchedulerBlockSchedulerVariant Variant,
353
                           SIScheduleBlocks BlocksStruct);
354
2
  ~SIScheduleBlockScheduler() = default;
355
356
2
  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
357
358
2
  unsigned getVGPRUsage() { return maxVregUsage; }
359
2
  unsigned getSGPRUsage() { return maxSregUsage; }
360
361
private:
362
  struct SIBlockSchedCandidate : SISchedulerCandidate {
363
    // The best Block candidate.
364
    SIScheduleBlock *Block = nullptr;
365
366
    bool IsHighLatency;
367
    int VGPRUsageDiff;
368
    unsigned NumSuccessors;
369
    unsigned NumHighLatencySuccessors;
370
    unsigned LastPosHighLatParentScheduled;
371
    unsigned Height;
372
373
12
    SIBlockSchedCandidate() = default;
374
375
5
    bool isValid() const { return Block; }
376
377
    // Copy the status of another candidate without changing policy.
378
5
    void setBest(SIBlockSchedCandidate &Best) {
379
5
      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
380
5
      Block = Best.Block;
381
5
      Reason = Best.Reason;
382
5
      IsHighLatency = Best.IsHighLatency;
383
5
      VGPRUsageDiff = Best.VGPRUsageDiff;
384
5
      NumSuccessors = Best.NumSuccessors;
385
5
      NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
386
5
      LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
387
5
      Height = Best.Height;
388
5
    }
389
  };
390
391
  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
392
                           SIBlockSchedCandidate &TryCand);
393
  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
394
                            SIBlockSchedCandidate &TryCand);
395
  SIScheduleBlock *pickBlock();
396
397
  void addLiveRegs(std::set<unsigned> &Regs);
398
  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
399
  void releaseBlockSuccs(SIScheduleBlock *Parent);
400
  void blockScheduled(SIScheduleBlock *Block);
401
402
  // Check register pressure change
403
  // by scheduling a block with these LiveIn and LiveOut.
404
  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
405
                                       std::set<unsigned> &OutRegs);
406
407
  void schedule();
408
};
409
410
struct SIScheduleBlockResult {
411
  std::vector<unsigned> SUs;
412
  unsigned MaxSGPRUsage;
413
  unsigned MaxVGPRUsage;
414
};
415
416
class SIScheduler {
417
  SIScheduleDAGMI *DAG;
418
  SIScheduleBlockCreator BlockCreator;
419
420
public:
421
2
  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
422
423
2
  ~SIScheduler() = default;
424
425
  struct SIScheduleBlockResult
426
  scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
427
                  SISchedulerBlockSchedulerVariant ScheduleVariant);
428
};
429
430
class SIScheduleDAGMI final : public ScheduleDAGMILive {
431
  const SIInstrInfo *SITII;
432
  const SIRegisterInfo *SITRI;
433
434
  std::vector<SUnit> SUnitsLinksBackup;
435
436
  // For moveLowLatencies. After all Scheduling variants are tested.
437
  std::vector<unsigned> ScheduledSUnits;
438
  std::vector<unsigned> ScheduledSUnitsInv;
439
440
  unsigned VGPRSetID;
441
  unsigned SGPRSetID;
442
443
public:
444
  SIScheduleDAGMI(MachineSchedContext *C);
445
446
  ~SIScheduleDAGMI() override;
447
448
  // Entry point for the schedule.
449
  void schedule() override;
450
451
  // To init Block's RPTracker.
452
15
  void initRPTracker(RegPressureTracker &RPTracker) {
453
15
    RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
454
15
  }
455
456
16
  MachineBasicBlock *getBB() { return BB; }
457
2
  MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
458
12
  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
459
13
  LiveIntervals *getLIS() { return LIS; }
460
21
  MachineRegisterInfo *getMRI() { return &MRI; }
461
5
  const TargetRegisterInfo *getTRI() { return TRI; }
462
0
  ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
463
0
  SUnit& getEntrySU() { return EntrySU; }
464
0
  SUnit& getExitSU() { return ExitSU; }
465
466
  void restoreSULinksLeft();
467
468
  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
469
                                                     _Iterator End,
470
                                                     unsigned &VgprUsage,
471
                                                     unsigned &SgprUsage);
472
473
2
  std::set<unsigned> getInRegs() {
474
2
    std::set<unsigned> InRegs;
475
11
    for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
476
11
      InRegs.insert(RegMaskPair.RegUnit);
477
11
    }
478
2
    return InRegs;
479
2
  }
480
481
2
  std::set<unsigned> getOutRegs() {
482
2
    std::set<unsigned> OutRegs;
483
3
    for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
484
3
      OutRegs.insert(RegMaskPair.RegUnit);
485
3
    }
486
2
    return OutRegs;
487
2
  };
488
489
30
  unsigned getVGPRSetID() const { return VGPRSetID; }
490
25
  unsigned getSGPRSetID() const { return SGPRSetID; }
491
492
private:
493
  void topologicalSort();
494
  // After scheduling is done, improve low latency placements.
495
  void moveLowLatencies();
496
497
public:
498
  // Some stats for scheduling inside blocks.
499
  std::vector<unsigned> IsLowLatencySU;
500
  std::vector<unsigned> LowLatencyOffset;
501
  std::vector<unsigned> IsHighLatencySU;
502
  // Topological sort
503
  // Maps topological index to the node number.
504
  std::vector<int> TopDownIndex2SU;
505
  std::vector<int> BottomUpIndex2SU;
506
};
507
508
} // end namespace llvm
509
510
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H