Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
Line
Count
Source (jump to first uncovered line)
1
//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// SI Machine Scheduler interface
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16
17
#include "SIInstrInfo.h"
18
#include "llvm/CodeGen/MachineBasicBlock.h"
19
#include "llvm/CodeGen/MachineScheduler.h"
20
#include "llvm/CodeGen/RegisterPressure.h"
21
#include "llvm/CodeGen/ScheduleDAG.h"
22
#include <cassert>
23
#include <cstdint>
24
#include <map>
25
#include <memory>
26
#include <set>
27
#include <vector>
28
29
namespace llvm {
30
31
enum SIScheduleCandReason {
32
  NoCand,
33
  RegUsage,
34
  Latency,
35
  Successor,
36
  Depth,
37
  NodeOrder
38
};
39
40
struct SISchedulerCandidate {
41
  // The reason for this candidate.
42
  SIScheduleCandReason Reason = NoCand;
43
44
  // Set of reasons that apply to multiple candidates.
45
  uint32_t RepeatReasonSet = 0;
46
47
221
  SISchedulerCandidate() = default;
48
49
0
  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
50
120
  void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
51
};
52
53
class SIScheduleDAGMI;
54
class SIScheduleBlockCreator;
55
56
enum SIScheduleBlockLinkKind {
57
  NoData,
58
  Data
59
};
60
61
class SIScheduleBlock {
62
  SIScheduleDAGMI *DAG;
63
  SIScheduleBlockCreator *BC;
64
65
  std::vector<SUnit*> SUnits;
66
  std::map<unsigned, unsigned> NodeNum2Index;
67
  std::vector<SUnit*> TopReadySUs;
68
  std::vector<SUnit*> ScheduledSUnits;
69
70
  /// The top of the unscheduled zone.
71
  IntervalPressure TopPressure;
72
  RegPressureTracker TopRPTracker;
73
74
  // Pressure: number of said class of registers needed to
75
  // store the live virtual and real registers.
76
  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
77
  // Pressure of additional registers required inside the block.
78
  std::vector<unsigned> InternalAdditionnalPressure;
79
  // Pressure of input and output registers
80
  std::vector<unsigned> LiveInPressure;
81
  std::vector<unsigned> LiveOutPressure;
82
  // Registers required by the block, and outputs.
83
  // We do track only virtual registers.
84
  // Note that some registers are not 32 bits,
85
  // and thus the pressure is not equal
86
  // to the number of live registers.
87
  std::set<unsigned> LiveInRegs;
88
  std::set<unsigned> LiveOutRegs;
89
90
  bool Scheduled = false;
91
  bool HighLatencyBlock = false;
92
93
  std::vector<unsigned> HasLowLatencyNonWaitedParent;
94
95
  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
96
  unsigned ID;
97
98
  std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
99
  // All blocks successors, and the kind of link
100
  std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
101
  unsigned NumHighLatencySuccessors = 0;
102
103
public:
104
  SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
105
                  unsigned ID):
106
21
    DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
107
108
21
  ~SIScheduleBlock() = default;
109
110
581
  unsigned getID() const { return ID; }
111
112
  /// Functions for Block construction.
113
  void addUnit(SUnit *SU);
114
115
  // When all SUs have been added.
116
  void finalizeUnits();
117
118
  // Add block pred, which has instruction predecessor of SU.
119
  void addPred(SIScheduleBlock *Pred);
120
  void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
121
122
141
  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
123
  ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
124
117
    getSuccs() const { return Succs; }
125
126
  unsigned Height;  // Maximum topdown path length to block without outputs
127
  unsigned Depth;   // Maximum bottomup path length to block without inputs
128
129
21
  unsigned getNumHighLatencySuccessors() const {
130
21
    return NumHighLatencySuccessors;
131
21
  }
132
133
55
  bool isHighLatencyBlock() { return HighLatencyBlock; }
134
135
  // This is approximative.
136
  // Ideally should take into accounts some instructions (rcp, etc)
137
  // are 4 times slower.
138
46
  int getCost() { return SUnits.size(); }
139
140
  // The block Predecessors and Successors must be all registered
141
  // before fastSchedule().
142
  // Fast schedule with no particular requirement.
143
  void fastSchedule();
144
145
63
  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
146
147
  // Complete schedule that will try to minimize reg pressure and
148
  // low latencies, and will fill liveins and liveouts.
149
  // Needs all MIs to be grouped between BeginBlock and EndBlock.
150
  // The MIs can be moved after the scheduling,
151
  // it is just used to allow correct track of live registers.
152
  void schedule(MachineBasicBlock::iterator BeginBlock,
153
                MachineBasicBlock::iterator EndBlock);
154
155
0
  bool isScheduled() { return Scheduled; }
156
157
  // Needs the block to be scheduled inside
158
  // TODO: find a way to compute it.
159
0
  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
160
0
    return InternalAdditionnalPressure;
161
0
  }
162
163
84
  std::set<unsigned> &getInRegs() { return LiveInRegs; }
164
112
  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
165
166
  void printDebug(bool Full);
167
168
private:
169
  struct SISchedCandidate : SISchedulerCandidate {
170
    // The best SUnit candidate.
171
    SUnit *SU = nullptr;
172
173
    unsigned SGPRUsage;
174
    unsigned VGPRUsage;
175
    bool IsLowLatency;
176
    unsigned LowLatencyOffset;
177
    bool HasLowLatencyNonWaitedParent;
178
179
170
    SISchedCandidate() = default;
180
181
107
    bool isValid() const { return SU; }
182
183
    // Copy the status of another candidate without changing policy.
184
76
    void setBest(SISchedCandidate &Best) {
185
76
      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
186
76
      SU = Best.SU;
187
76
      Reason = Best.Reason;
188
76
      SGPRUsage = Best.SGPRUsage;
189
76
      VGPRUsage = Best.VGPRUsage;
190
76
      IsLowLatency = Best.IsLowLatency;
191
76
      LowLatencyOffset = Best.LowLatencyOffset;
192
76
      HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
193
76
    }
194
  };
195
196
  void undoSchedule();
197
198
  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
199
  void releaseSucc(SUnit *SU, SDep *SuccEdge);
200
  // InOrOutBlock: restrict to links pointing inside the block (true),
201
  // or restrict to links pointing outside the block (false).
202
  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
203
204
  void nodeScheduled(SUnit *SU);
205
  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
206
  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207
  SUnit* pickNode();
208
  void traceCandidate(const SISchedCandidate &Cand);
209
  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
210
                       MachineBasicBlock::iterator EndBlock);
211
};
212
213
struct SIScheduleBlocks {
214
  std::vector<SIScheduleBlock*> Blocks;
215
  std::vector<int> TopDownIndex2Block;
216
  std::vector<int> TopDownBlock2Index;
217
};
218
219
enum SISchedulerBlockCreatorVariant {
220
  LatenciesAlone,
221
  LatenciesGrouped,
222
  LatenciesAlonePlusConsecutive
223
};
224
225
class SIScheduleBlockCreator {
226
  SIScheduleDAGMI *DAG;
227
  // unique_ptr handles freeing memory for us.
228
  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
229
  std::map<SISchedulerBlockCreatorVariant,
230
           SIScheduleBlocks> Blocks;
231
  std::vector<SIScheduleBlock*> CurrentBlocks;
232
  std::vector<int> Node2CurrentBlock;
233
234
  // Topological sort
235
  // Maps topological index to the node number.
236
  std::vector<int> TopDownIndex2Block;
237
  std::vector<int> TopDownBlock2Index;
238
  std::vector<int> BottomUpIndex2Block;
239
240
  // 0 -> Color not given.
241
  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
242
  // Above -> Other groups.
243
  int NextReservedID;
244
  int NextNonReservedID;
245
  std::vector<int> CurrentColoring;
246
  std::vector<int> CurrentTopDownReservedDependencyColoring;
247
  std::vector<int> CurrentBottomUpReservedDependencyColoring;
248
249
public:
250
  SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
251
  ~SIScheduleBlockCreator();
252
253
  SIScheduleBlocks
254
  getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
255
256
  bool isSUInBlock(SUnit *SU, unsigned ID);
257
258
private:
259
  // Give a Reserved color to every high latency.
260
  void colorHighLatenciesAlone();
261
262
  // Create groups of high latencies with a Reserved color.
263
  void colorHighLatenciesGroups();
264
265
  // Compute coloring for topdown and bottom traversals with
266
  // different colors depending on dependencies on Reserved colors.
267
  void colorComputeReservedDependencies();
268
269
  // Give color to all non-colored SUs according to Reserved groups dependencies.
270
  void colorAccordingToReservedDependencies();
271
272
  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
273
  // The new colors are computed according to the dependencies on the other blocks
274
  // formed with colorAccordingToReservedDependencies.
275
  void colorEndsAccordingToDependencies();
276
277
  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
278
  void colorForceConsecutiveOrderInGroup();
279
280
  // Merge Constant loads that have all their users into another group to the group.
281
  // (TODO: else if all their users depend on the same group, put them there)
282
  void colorMergeConstantLoadsNextGroup();
283
284
  // Merge SUs that have all their users into another group to the group
285
  void colorMergeIfPossibleNextGroup();
286
287
  // Merge SUs that have all their users into another group to the group,
288
  // but only for Reserved groups.
289
  void colorMergeIfPossibleNextGroupOnlyForReserved();
290
291
  // Merge SUs that have all their users into another group to the group,
292
  // but only if the group is no more than a few SUs.
293
  void colorMergeIfPossibleSmallGroupsToNextGroup();
294
295
  // Divides Blocks with important size.
296
  // Idea of implementation: attribute new colors depending on topdown and
297
  // bottom up links to other blocks.
298
  void cutHugeBlocks();
299
300
  // Put in one group all instructions with no users in this scheduling region
301
  // (we'd want these groups be at the end).
302
  void regroupNoUserInstructions();
303
304
  // Give Reserved color to export instructions
305
  void colorExports();
306
307
  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
308
309
  void topologicalSort();
310
311
  void scheduleInsideBlocks();
312
313
  void fillStats();
314
};
315
316
enum SISchedulerBlockSchedulerVariant {
317
  BlockLatencyRegUsage,
318
  BlockRegUsageLatency,
319
  BlockRegUsage
320
};
321
322
class SIScheduleBlockScheduler {
323
  SIScheduleDAGMI *DAG;
324
  SISchedulerBlockSchedulerVariant Variant;
325
  std::vector<SIScheduleBlock*> Blocks;
326
327
  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
328
  std::set<unsigned> LiveRegs;
329
  // Num of schedulable unscheduled blocks reading the register.
330
  std::map<unsigned, unsigned> LiveRegsConsumers;
331
332
  std::vector<unsigned> LastPosHighLatencyParentScheduled;
333
  int LastPosWaitedHighLatency;
334
335
  std::vector<SIScheduleBlock*> BlocksScheduled;
336
  unsigned NumBlockScheduled;
337
  std::vector<SIScheduleBlock*> ReadyBlocks;
338
339
  unsigned VregCurrentUsage;
340
  unsigned SregCurrentUsage;
341
342
  // Currently is only approximation.
343
  unsigned maxVregUsage;
344
  unsigned maxSregUsage;
345
346
  std::vector<unsigned> BlockNumPredsLeft;
347
  std::vector<unsigned> BlockNumSuccsLeft;
348
349
public:
350
  SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
351
                           SISchedulerBlockSchedulerVariant Variant,
352
                           SIScheduleBlocks BlocksStruct);
353
9
  ~SIScheduleBlockScheduler() = default;
354
355
9
  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
356
357
9
  unsigned getVGPRUsage() { return maxVregUsage; }
358
9
  unsigned getSGPRUsage() { return maxSregUsage; }
359
360
private:
361
  struct SIBlockSchedCandidate : SISchedulerCandidate {
362
    // The best Block candidate.
363
    SIScheduleBlock *Block = nullptr;
364
365
    bool IsHighLatency;
366
    int VGPRUsageDiff;
367
    unsigned NumSuccessors;
368
    unsigned NumHighLatencySuccessors;
369
    unsigned LastPosHighLatParentScheduled;
370
    unsigned Height;
371
372
51
    SIBlockSchedCandidate() = default;
373
374
21
    bool isValid() const { return Block; }
375
376
    // Copy the status of another candidate without changing policy.
377
21
    void setBest(SIBlockSchedCandidate &Best) {
378
21
      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
379
21
      Block = Best.Block;
380
21
      Reason = Best.Reason;
381
21
      IsHighLatency = Best.IsHighLatency;
382
21
      VGPRUsageDiff = Best.VGPRUsageDiff;
383
21
      NumSuccessors = Best.NumSuccessors;
384
21
      NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
385
21
      LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
386
21
      Height = Best.Height;
387
21
    }
388
  };
389
390
  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
391
                           SIBlockSchedCandidate &TryCand);
392
  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
393
                            SIBlockSchedCandidate &TryCand);
394
  SIScheduleBlock *pickBlock();
395
396
  void addLiveRegs(std::set<unsigned> &Regs);
397
  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
398
  void releaseBlockSuccs(SIScheduleBlock *Parent);
399
  void blockScheduled(SIScheduleBlock *Block);
400
401
  // Check register pressure change
402
  // by scheduling a block with these LiveIn and LiveOut.
403
  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
404
                                       std::set<unsigned> &OutRegs);
405
406
  void schedule();
407
};
408
409
struct SIScheduleBlockResult {
410
  std::vector<unsigned> SUs;
411
  unsigned MaxSGPRUsage;
412
  unsigned MaxVGPRUsage;
413
};
414
415
class SIScheduler {
416
  SIScheduleDAGMI *DAG;
417
  SIScheduleBlockCreator BlockCreator;
418
419
public:
420
9
  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
421
422
9
  ~SIScheduler() = default;
423
424
  struct SIScheduleBlockResult
425
  scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
426
                  SISchedulerBlockSchedulerVariant ScheduleVariant);
427
};
428
429
class SIScheduleDAGMI final : public ScheduleDAGMILive {
430
  const SIInstrInfo *SITII;
431
  const SIRegisterInfo *SITRI;
432
433
  std::vector<SUnit> SUnitsLinksBackup;
434
435
  // For moveLowLatencies. After all Scheduling variants are tested.
436
  std::vector<unsigned> ScheduledSUnits;
437
  std::vector<unsigned> ScheduledSUnitsInv;
438
439
  unsigned VGPRSetID;
440
  unsigned SGPRSetID;
441
442
public:
443
  SIScheduleDAGMI(MachineSchedContext *C);
444
445
  ~SIScheduleDAGMI() override;
446
447
  // Entry point for the schedule.
448
  void schedule() override;
449
450
  // To init Block's RPTracker.
451
63
  void initRPTracker(RegPressureTracker &RPTracker) {
452
63
    RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
453
63
  }
454
455
44
  MachineBasicBlock *getBB() { return BB; }
456
9
  MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
457
52
  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
458
43
  LiveIntervals *getLIS() { return LIS; }
459
84
  MachineRegisterInfo *getMRI() { return &MRI; }
460
21
  const TargetRegisterInfo *getTRI() { return TRI; }
461
0
  ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
462
0
  SUnit& getEntrySU() { return EntrySU; }
463
0
  SUnit& getExitSU() { return ExitSU; }
464
465
  void restoreSULinksLeft();
466
467
  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
468
                                                     _Iterator End,
469
                                                     unsigned &VgprUsage,
470
                                                     unsigned &SgprUsage);
471
472
9
  std::set<unsigned> getInRegs() {
473
9
    std::set<unsigned> InRegs;
474
28
    for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
475
28
      InRegs.insert(RegMaskPair.RegUnit);
476
28
    }
477
9
    return InRegs;
478
9
  }
479
480
9
  std::set<unsigned> getOutRegs() {
481
9
    std::set<unsigned> OutRegs;
482
11
    for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
483
11
      OutRegs.insert(RegMaskPair.RegUnit);
484
11
    }
485
9
    return OutRegs;
486
9
  };
487
488
128
  unsigned getVGPRSetID() const { return VGPRSetID; }
489
107
  unsigned getSGPRSetID() const { return SGPRSetID; }
490
491
private:
492
  void topologicalSort();
493
  // After scheduling is done, improve low latency placements.
494
  void moveLowLatencies();
495
496
public:
497
  // Some stats for scheduling inside blocks.
498
  std::vector<unsigned> IsLowLatencySU;
499
  std::vector<unsigned> LowLatencyOffset;
500
  std::vector<unsigned> IsHighLatencySU;
501
  // Topological sort
502
  // Maps topological index to the node number.
503
  std::vector<int> TopDownIndex2SU;
504
  std::vector<int> BottomUpIndex2SU;
505
};
506
507
} // end namespace llvm
508
509
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H