Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements hazard recognizers for scheduling on GCN processors.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "GCNHazardRecognizer.h"
14
#include "AMDGPUSubtarget.h"
15
#include "SIDefines.h"
16
#include "SIInstrInfo.h"
17
#include "SIRegisterInfo.h"
18
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19
#include "Utils/AMDGPUBaseInfo.h"
20
#include "llvm/ADT/iterator_range.h"
21
#include "llvm/CodeGen/MachineFunction.h"
22
#include "llvm/CodeGen/MachineInstr.h"
23
#include "llvm/CodeGen/MachineInstrBuilder.h"
24
#include "llvm/CodeGen/MachineOperand.h"
25
#include "llvm/CodeGen/ScheduleDAG.h"
26
#include "llvm/MC/MCInstrDesc.h"
27
#include "llvm/Support/ErrorHandling.h"
28
#include <algorithm>
29
#include <cassert>
30
#include <limits>
31
#include <set>
32
#include <vector>
33
34
using namespace llvm;
35
36
//===----------------------------------------------------------------------===//
37
// Hazard Recoginizer Implementation
38
//===----------------------------------------------------------------------===//
39
40
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
41
  IsHazardRecognizerMode(false),
42
  CurrCycleInstr(nullptr),
43
  MF(MF),
44
  ST(MF.getSubtarget<GCNSubtarget>()),
45
  TII(*ST.getInstrInfo()),
46
  TRI(TII.getRegisterInfo()),
47
  ClauseUses(TRI.getNumRegUnits()),
48
47.2k
  ClauseDefs(TRI.getNumRegUnits()) {
49
47.2k
  MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 
181.31k
:
545.9k
;
50
47.2k
  TSchedModel.init(&ST);
51
47.2k
}
52
53
270k
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
54
270k
  EmitInstruction(SU->getInstr());
55
270k
}
56
57
705k
void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
58
705k
  CurrCycleInstr = MI;
59
705k
}
60
61
859k
static bool isDivFMas(unsigned Opcode) {
62
859k
  return Opcode == AMDGPU::V_DIV_FMAS_F32 || 
Opcode == AMDGPU::V_DIV_FMAS_F64859k
;
63
859k
}
64
65
857k
static bool isSGetReg(unsigned Opcode) {
66
857k
  return Opcode == AMDGPU::S_GETREG_B32;
67
857k
}
68
69
858k
static bool isSSetReg(unsigned Opcode) {
70
858k
  return Opcode == AMDGPU::S_SETREG_B32 || 
Opcode == AMDGPU::S_SETREG_IMM32_B32858k
;
71
858k
}
72
73
859k
static bool isRWLane(unsigned Opcode) {
74
859k
  return Opcode == AMDGPU::V_READLANE_B32 || 
Opcode == AMDGPU::V_WRITELANE_B32859k
;
75
859k
}
76
77
857k
static bool isRFE(unsigned Opcode) {
78
857k
  return Opcode == AMDGPU::S_RFE_B64;
79
857k
}
80
81
183k
static bool isSMovRel(unsigned Opcode) {
82
183k
  switch (Opcode) {
83
183k
  case AMDGPU::S_MOVRELS_B32:
84
4
  case AMDGPU::S_MOVRELS_B64:
85
4
  case AMDGPU::S_MOVRELD_B32:
86
4
  case AMDGPU::S_MOVRELD_B64:
87
4
    return true;
88
183k
  default:
89
183k
    return false;
90
183k
  }
91
183k
}
92
93
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
94
549k
                                    const MachineInstr &MI) {
95
549k
  if (TII.isAlwaysGDS(MI.getOpcode()))
96
204
    return true;
97
549k
98
549k
  switch (MI.getOpcode()) {
99
549k
  case AMDGPU::S_SENDMSG:
100
139
  case AMDGPU::S_SENDMSGHALT:
101
139
  case AMDGPU::S_TTRACEDATA:
102
139
    return true;
103
139
  // These DS opcodes don't support GDS.
104
139
  case AMDGPU::DS_NOP:
105
21
  case AMDGPU::DS_PERMUTE_B32:
106
21
  case AMDGPU::DS_BPERMUTE_B32:
107
21
    return false;
108
548k
  default:
109
548k
    if (TII.isDS(MI.getOpcode())) {
110
17.2k
      int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
111
17.2k
                                           AMDGPU::OpName::gds);
112
17.2k
      if (MI.getOperand(GDS).getImm())
113
120
        return true;
114
548k
    }
115
548k
    return false;
116
549k
  }
117
549k
}
118
119
28.1k
static bool isPermlane(const MachineInstr &MI) {
120
28.1k
  unsigned Opcode = MI.getOpcode();
121
28.1k
  return Opcode == AMDGPU::V_PERMLANE16_B32 ||
122
28.1k
         
Opcode == AMDGPU::V_PERMLANEX16_B3228.1k
;
123
28.1k
}
124
125
616
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
126
616
  const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
127
616
                                                     AMDGPU::OpName::simm16);
128
616
  return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
129
616
}
130
131
ScheduleHazardRecognizer::HazardType
132
278k
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
133
278k
  MachineInstr *MI = SU->getInstr();
134
278k
  if (MI->isBundle())
135
831
   return NoHazard;
136
277k
137
277k
  if (SIInstrInfo::isSMRD(*MI) && 
checkSMRDHazards(MI) > 026.9k
)
138
84
    return NoopHazard;
139
277k
140
277k
  // FIXME: Should flat be considered vmem?
141
277k
  if ((SIInstrInfo::isVMEM(*MI) ||
142
277k
       
SIInstrInfo::isFLAT(*MI)251k
)
143
277k
      && 
checkVMEMHazards(MI) > 045.5k
)
144
1.16k
    return NoopHazard;
145
276k
146
276k
  if (ST.hasNSAtoVMEMBug() && 
checkNSAtoVMEMHazard(MI) > 020.3k
)
147
0
    return NoopHazard;
148
276k
149
276k
  if (checkFPAtomicToDenormModeHazard(MI) > 0)
150
0
    return NoopHazard;
151
276k
152
276k
  if (ST.hasNoDataDepHazard())
153
20.3k
    return NoHazard;
154
256k
155
256k
  if (SIInstrInfo::isVALU(*MI) && 
checkVALUHazards(MI) > 0129k
)
156
1.79k
    return NoopHazard;
157
254k
158
254k
  if (SIInstrInfo::isDPP(*MI) && 
checkDPPHazards(MI) > 0730
)
159
465
    return NoopHazard;
160
254k
161
254k
  if (isDivFMas(MI->getOpcode()) && 
checkDivFMasHazards(MI) > 0135
)
162
21
    return NoopHazard;
163
254k
164
254k
  if (isRWLane(MI->getOpcode()) && 
checkRWLaneHazards(MI) > 065
)
165
12
    return NoopHazard;
166
254k
167
254k
  if (isSGetReg(MI->getOpcode()) && 
checkGetRegHazards(MI) > 0101
)
168
0
    return NoopHazard;
169
254k
170
254k
  if (isSSetReg(MI->getOpcode()) && 
checkSetRegHazards(MI) > 00
)
171
0
    return NoopHazard;
172
254k
173
254k
  if (isRFE(MI->getOpcode()) && 
checkRFEHazards(MI) > 00
)
174
0
    return NoopHazard;
175
254k
176
254k
  if (ST.hasReadM0MovRelInterpHazard() &&
177
254k
      
(61.5k
TII.isVINTRP(*MI)61.5k
||
isSMovRel(MI->getOpcode())61.5k
) &&
178
254k
      
checkReadM0Hazards(MI) > 018
)
179
6
    return NoopHazard;
180
254k
181
254k
  if (ST.hasReadM0SendMsgHazard() && 
isSendMsgTraceDataOrGDS(TII, *MI)178k
&&
182
254k
      
checkReadM0Hazards(MI) > 0148
)
183
62
    return NoopHazard;
184
254k
185
254k
  if (SIInstrInfo::isMAI(*MI) && 
checkMAIHazards(MI) > 06.60k
)
186
4.55k
    return NoopHazard;
187
249k
188
249k
  if ((MI->mayLoad() || 
MI->mayStore()204k
) &&
checkMAILdStHazards(MI) > 072.5k
)
189
71
    return NoopHazard;
190
249k
191
249k
  if (MI->isInlineAsm() && 
checkInlineAsmHazards(MI) > 0918
)
192
0
    return NoopHazard;
193
249k
194
249k
  if (checkAnyInstHazards(MI) > 0)
195
0
    return NoopHazard;
196
249k
197
249k
  return NoHazard;
198
249k
}
199
200
26
static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
201
26
  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
202
26
      .addImm(0);
203
26
}
204
205
1.74k
void GCNHazardRecognizer::processBundle() {
206
1.74k
  MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
207
1.74k
  MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
208
1.74k
  // Check bundled MachineInstr's for hazards.
209
6.69k
  for (; MI != E && 
MI->isInsideBundle()6.69k
;
++MI4.95k
) {
210
4.95k
    CurrCycleInstr = &*MI;
211
4.95k
    unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
212
4.95k
213
4.95k
    if (IsHazardRecognizerMode)
214
2.60k
      fixHazards(CurrCycleInstr);
215
4.95k
216
4.97k
    for (unsigned i = 0; i < WaitStates; 
++i26
)
217
26
      insertNoopInBundle(CurrCycleInstr, TII);
218
4.95k
219
4.95k
    // It’s unnecessary to track more than MaxLookAhead instructions. Since we
220
4.95k
    // include the bundled MI directly after, only add a maximum of
221
4.95k
    // (MaxLookAhead - 1) noops to EmittedInstrs.
222
4.97k
    for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; 
++i26
)
223
26
      EmittedInstrs.push_front(nullptr);
224
4.95k
225
4.95k
    EmittedInstrs.push_front(CurrCycleInstr);
226
4.95k
    EmittedInstrs.resize(MaxLookAhead);
227
4.95k
  }
228
1.74k
  CurrCycleInstr = nullptr;
229
1.74k
}
230
231
270k
unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
232
270k
  IsHazardRecognizerMode = false;
233
270k
  return PreEmitNoopsCommon(SU->getInstr());
234
270k
}
235
236
434k
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
237
434k
  IsHazardRecognizerMode = true;
238
434k
  CurrCycleInstr = MI;
239
434k
  unsigned W = PreEmitNoopsCommon(MI);
240
434k
  fixHazards(MI);
241
434k
  CurrCycleInstr = nullptr;
242
434k
  return W;
243
434k
}
244
245
710k
unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
246
710k
  if (MI->isBundle())
247
1.74k
    return 0;
248
708k
249
708k
  int WaitStates = std::max(0, checkAnyInstHazards(MI));
250
708k
251
708k
  if (SIInstrInfo::isSMRD(*MI))
252
59.8k
    return std::max(WaitStates, checkSMRDHazards(MI));
253
648k
254
648k
  if (SIInstrInfo::isVMEM(*MI) || 
SIInstrInfo::isFLAT(*MI)579k
)
255
106k
    WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
256
648k
257
648k
  if (ST.hasNSAtoVMEMBug())
258
42.7k
    WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
259
648k
260
648k
  WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
261
648k
262
648k
  if (ST.hasNoDataDepHazard())
263
42.7k
    return WaitStates;
264
605k
265
605k
  if (SIInstrInfo::isVALU(*MI))
266
277k
    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
267
605k
268
605k
  if (SIInstrInfo::isDPP(*MI))
269
548
    WaitStates = std::max(WaitStates, checkDPPHazards(MI));
270
605k
271
605k
  if (isDivFMas(MI->getOpcode()))
272
272
    WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
273
605k
274
605k
  if (isRWLane(MI->getOpcode()))
275
122
    WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
276
605k
277
605k
  if (MI->isInlineAsm())
278
2.37k
    return std::max(WaitStates, checkInlineAsmHazards(MI));
279
603k
280
603k
  if (isSGetReg(MI->getOpcode()))
281
224
    return std::max(WaitStates, checkGetRegHazards(MI));
282
603k
283
603k
  if (isSSetReg(MI->getOpcode()))
284
255
    return std::max(WaitStates, checkSetRegHazards(MI));
285
602k
286
602k
  if (isRFE(MI->getOpcode()))
287
8
    return std::max(WaitStates, checkRFEHazards(MI));
288
602k
289
602k
  if (ST.hasReadM0MovRelInterpHazard() && 
(122k
TII.isVINTRP(*MI)122k
||
290
122k
                                           
isSMovRel(MI->getOpcode())122k
))
291
36
    return std::max(WaitStates, checkReadM0Hazards(MI));
292
602k
293
602k
  if (ST.hasReadM0SendMsgHazard() && 
isSendMsgTraceDataOrGDS(TII, *MI)370k
)
294
315
    return std::max(WaitStates, checkReadM0Hazards(MI));
295
602k
296
602k
  if (SIInstrInfo::isMAI(*MI))
297
4.14k
    return std::max(WaitStates, checkMAIHazards(MI));
298
598k
299
598k
  if (MI->mayLoad() || 
MI->mayStore()501k
)
300
162k
    return std::max(WaitStates, checkMAILdStHazards(MI));
301
435k
302
435k
  return WaitStates;
303
435k
}
304
305
3.87k
void GCNHazardRecognizer::EmitNoop() {
306
3.87k
  EmittedInstrs.push_front(nullptr);
307
3.87k
}
308
309
1.57M
void GCNHazardRecognizer::AdvanceCycle() {
310
1.57M
  // When the scheduler detects a stall, it will call AdvanceCycle() without
311
1.57M
  // emitting any instructions.
312
1.57M
  if (!CurrCycleInstr)
313
843k
    return;
314
727k
315
727k
  // Do not track non-instructions which do not affect the wait states.
316
727k
  // If included, these instructions can lead to buffer overflow such that
317
727k
  // detectable hazards are missed.
318
727k
  if (CurrCycleInstr->isImplicitDef() || 
CurrCycleInstr->isDebugInstr()699k
||
319
727k
      
CurrCycleInstr->isKill()699k
)
320
29.1k
    return;
321
698k
322
698k
  if (CurrCycleInstr->isBundle()) {
323
1.74k
    processBundle();
324
1.74k
    return;
325
1.74k
  }
326
697k
327
697k
  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
328
697k
329
697k
  // Keep track of emitted instructions
330
697k
  EmittedInstrs.push_front(CurrCycleInstr);
331
697k
332
697k
  // Add a nullptr for each additional wait state after the first.  Make sure
333
697k
  // not to add more than getMaxLookAhead() items to the list, since we
334
697k
  // truncate the list to that size right after this loop.
335
697k
  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
336
697k
       i < e; 
++i794
) {
337
794
    EmittedInstrs.push_front(nullptr);
338
794
  }
339
697k
340
697k
  // getMaxLookahead() is the largest number of wait states we will ever need
341
697k
  // to insert, so there is no point in keeping track of more than that many
342
697k
  // wait states.
343
697k
  EmittedInstrs.resize(getMaxLookAhead());
344
697k
345
697k
  CurrCycleInstr = nullptr;
346
697k
}
347
348
0
void GCNHazardRecognizer::RecedeCycle() {
349
0
  llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
350
0
}
351
352
//===----------------------------------------------------------------------===//
353
// Helper Functions
354
//===----------------------------------------------------------------------===//
355
356
typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
357
358
// Returns a minimum wait states since \p I walking all predecessors.
359
// Only scans until \p IsExpired does not return true.
360
// Can only be run in a hazard recognizer mode.
361
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
362
                              MachineBasicBlock *MBB,
363
                              MachineBasicBlock::reverse_instr_iterator I,
364
                              int WaitStates,
365
                              IsExpiredFn IsExpired,
366
332k
                              DenseSet<const MachineBasicBlock *> &Visited) {
367
768k
  for (auto E = MBB->instr_rend(); I != E; 
++I435k
) {
368
737k
    // Don't add WaitStates for parent BUNDLE instructions.
369
737k
    if (I->isBundle())
370
523
      continue;
371
736k
372
736k
    if (IsHazard(&*I))
373
656
      return WaitStates;
374
736k
375
736k
    if (I->isInlineAsm() || 
I->isImplicitDef()732k
||
I->isDebugInstr()726k
)
376
9.46k
      continue;
377
726k
378
726k
    WaitStates += SIInstrInfo::getNumWaitStates(*I);
379
726k
380
726k
    if (IsExpired(&*I, WaitStates))
381
301k
      return std::numeric_limits<int>::max();
382
726k
  }
383
332k
384
332k
  int MinWaitStates = WaitStates;
385
30.6k
  bool Found = false;
386
30.6k
  for (MachineBasicBlock *Pred : MBB->predecessors()) {
387
5.26k
    if (!Visited.insert(Pred).second)
388
448
      continue;
389
4.82k
390
4.82k
    int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
391
4.82k
                               WaitStates, IsExpired, Visited);
392
4.82k
393
4.82k
    if (W == std::numeric_limits<int>::max())
394
4.76k
      continue;
395
57
396
57
    MinWaitStates = Found ? 
std::min(MinWaitStates, W)6
:
W51
;
397
57
    if (IsExpired(nullptr, MinWaitStates))
398
0
      return MinWaitStates;
399
57
400
57
    Found = true;
401
57
  }
402
30.6k
403
30.6k
  if (Found)
404
51
    return MinWaitStates;
405
30.5k
406
30.5k
  return std::numeric_limits<int>::max();
407
30.5k
}
408
409
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
410
                              MachineInstr *MI,
411
327k
                              IsExpiredFn IsExpired) {
412
327k
  DenseSet<const MachineBasicBlock *> Visited;
413
327k
  return getWaitStatesSince(IsHazard, MI->getParent(),
414
327k
                            std::next(MI->getReverseIterator()),
415
327k
                            0, IsExpired, Visited);
416
327k
}
417
418
900k
int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
419
900k
  if (IsHazardRecognizerMode) {
420
676k
    auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
421
676k
      return WaitStates >= Limit;
422
676k
    };
423
316k
    return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
424
316k
  }
425
584k
426
584k
  int WaitStates = 0;
427
1.35M
  for (MachineInstr *MI : EmittedInstrs) {
428
1.35M
    if (MI) {
429
1.19M
      if (IsHazard(MI))
430
9.80k
        return WaitStates;
431
1.18M
432
1.18M
      if (MI->isInlineAsm())
433
2.78k
        continue;
434
1.34M
    }
435
1.34M
    ++WaitStates;
436
1.34M
437
1.34M
    if (WaitStates >= Limit)
438
548k
      break;
439
1.34M
  }
440
584k
  
return std::numeric_limits<int>::max()574k
;
441
584k
}
442
443
int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
444
                                               IsHazardFn IsHazardDef,
445
564k
                                               int Limit) {
446
564k
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
447
564k
448
1.50M
  auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
449
1.50M
    return IsHazardDef(MI) && 
MI->modifiesRegister(Reg, TRI)412k
;
450
1.50M
  };
451
564k
452
564k
  return getWaitStatesSince(IsHazardFn, Limit);
453
564k
}
454
455
int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
456
584
                                                  int Limit) {
457
1.06k
  auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
458
1.06k
    return isSSetReg(MI->getOpcode()) && 
IsHazard(MI)36
;
459
1.06k
  };
460
584
461
584
  return getWaitStatesSince(IsHazardFn, Limit);
462
584
}
463
464
//===----------------------------------------------------------------------===//
465
// No-op Hazard Detection
466
//===----------------------------------------------------------------------===//
467
468
static void addRegUnits(const SIRegisterInfo &TRI,
469
88.5k
                        BitVector &BV, unsigned Reg) {
470
333k
  for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); 
++RUI245k
)
471
245k
    BV.set(*RUI);
472
88.5k
}
473
474
static void addRegsToSet(const SIRegisterInfo &TRI,
475
                         iterator_range<MachineInstr::const_mop_iterator> Ops,
476
57.8k
                         BitVector &Set) {
477
172k
  for (const MachineOperand &Op : Ops) {
478
172k
    if (Op.isReg())
479
88.5k
      addRegUnits(TRI, Set, Op.getReg());
480
172k
  }
481
57.8k
}
482
483
28.9k
void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
484
28.9k
  // XXX: Do we need to worry about implicit operands
485
28.9k
  addRegsToSet(TRI, MI.defs(), ClauseDefs);
486
28.9k
  addRegsToSet(TRI, MI.uses(), ClauseUses);
487
28.9k
}
488
489
188k
int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
490
188k
  // SMEM soft clause are only present on VI+, and only matter if xnack is
491
188k
  // enabled.
492
188k
  if (!ST.isXNACKEnabled())
493
176k
    return 0;
494
12.5k
495
12.5k
  bool IsSMRD = TII.isSMRD(*MEM);
496
12.5k
497
12.5k
  resetClause();
498
12.5k
499
12.5k
  // A soft-clause is any group of consecutive SMEM instructions.  The
500
12.5k
  // instructions in this group may return out of order and/or may be
501
12.5k
  // replayed (i.e. the same instruction issued more than once).
502
12.5k
  //
503
12.5k
  // In order to handle these situations correctly we need to make sure that
504
12.5k
  // when a clause has more than one instruction, no instruction in the clause
505
12.5k
  // writes to a register that is read by another instruction in the clause
506
12.5k
  // (including itself). If we encounter this situaion, we need to break the
507
12.5k
  // clause by inserting a non SMEM instruction.
508
12.5k
509
32.3k
  for (MachineInstr *MI : EmittedInstrs) {
510
32.3k
    // When we hit a non-SMEM instruction then we have passed the start of the
511
32.3k
    // clause and we can stop.
512
32.3k
    if (!MI)
513
5.77k
      break;
514
26.5k
515
26.5k
    if (IsSMRD != SIInstrInfo::isSMRD(*MI))
516
1.30k
      break;
517
25.2k
518
25.2k
    addClauseInst(*MI);
519
25.2k
  }
520
12.5k
521
12.5k
  if (ClauseDefs.none())
522
7.92k
    return 0;
523
4.63k
524
4.63k
  // We need to make sure not to put loads and stores in the same clause if they
525
4.63k
  // use the same address. For now, just start a new clause whenever we see a
526
4.63k
  // store.
527
4.63k
  if (MEM->mayStore())
528
1.03k
    return 1;
529
3.60k
530
3.60k
  addClauseInst(*MEM);
531
3.60k
532
3.60k
  // If the set of defs and uses intersect then we cannot add this instruction
533
3.60k
  // to the clause, so we have a hazard.
534
3.60k
  return ClauseDefs.anyCommon(ClauseUses) ? 
1731
:
02.87k
;
535
3.60k
}
536
537
86.8k
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
538
86.8k
  int WaitStatesNeeded = 0;
539
86.8k
540
86.8k
  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
541
86.8k
542
86.8k
  // This SMRD hazard only affects SI.
543
86.8k
  if (!ST.hasSMRDReadVALUDefHazard())
544
71.8k
    return WaitStatesNeeded;
545
14.9k
546
14.9k
  // A read of an SGPR by SMRD instruction requires 4 wait states when the
547
14.9k
  // SGPR was written by a VALU instruction.
548
14.9k
  int SmrdSgprWaitStates = 4;
549
14.9k
  auto IsHazardDefFn = [this] (MachineInstr *MI) 
{ return TII.isVALU(*MI); }12.6k
;
550
14.9k
  auto IsBufferHazardDefFn = [this] (MachineInstr *MI) 
{ return TII.isSALU(*MI); }720
;
551
14.9k
552
14.9k
  bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
553
14.9k
554
59.8k
  for (const MachineOperand &Use : SMRD->uses()) {
555
59.8k
    if (!Use.isReg())
556
44.7k
      continue;
557
15.0k
    int WaitStatesNeededForUse =
558
15.0k
        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
559
15.0k
                                                   SmrdSgprWaitStates);
560
15.0k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
561
15.0k
562
15.0k
    // This fixes what appears to be undocumented hardware behavior in SI where
563
15.0k
    // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
564
15.0k
    // needs some number of nops in between. We don't know how many we need, but
565
15.0k
    // let's use 4. This wasn't discovered before probably because the only
566
15.0k
    // case when this happens is when we expand a 64-bit pointer into a full
567
15.0k
    // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
568
15.0k
    // probably never encountered in the closed-source land.
569
15.0k
    if (IsBufferSMRD) {
570
304
      int WaitStatesNeededForUse =
571
304
        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
572
304
                                                   IsBufferHazardDefFn,
573
304
                                                   SmrdSgprWaitStates);
574
304
      WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
575
304
    }
576
15.0k
  }
577
14.9k
578
14.9k
  return WaitStatesNeeded;
579
14.9k
}
580
581
151k
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
582
151k
  if (!ST.hasVMEMReadSGPRVALUDefHazard())
583
49.7k
    return 0;
584
102k
585
102k
  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
586
102k
587
102k
  // A read of an SGPR by a VMEM instruction requires 5 wait states when the
588
102k
  // SGPR was written by a VALU Instruction.
589
102k
  const int VmemSgprWaitStates = 5;
590
960k
  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
591
816k
  for (const MachineOperand &Use : VMEM->uses()) {
592
816k
    if (!Use.isReg() || 
TRI.isVGPR(MF.getRegInfo(), Use.getReg())333k
)
593
606k
      continue;
594
209k
595
209k
    int WaitStatesNeededForUse =
596
209k
        VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
597
209k
                                                   VmemSgprWaitStates);
598
209k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
599
209k
  }
600
102k
  return WaitStatesNeeded;
601
102k
}
602
603
1.27k
int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
604
1.27k
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
605
1.27k
  const SIInstrInfo *TII = ST.getInstrInfo();
606
1.27k
607
1.27k
  // Check for DPP VGPR read after VALU VGPR write and EXEC write.
608
1.27k
  int DppVgprWaitStates = 2;
609
1.27k
  int DppExecWaitStates = 5;
610
1.27k
  int WaitStatesNeeded = 0;
611
2.83k
  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
612
1.27k
613
10.1k
  for (const MachineOperand &Use : DPP->uses()) {
614
10.1k
    if (!Use.isReg() || 
!TRI->isVGPR(MF.getRegInfo(), Use.getReg())5.06k
)
615
6.66k
      continue;
616
3.51k
    int WaitStatesNeededForUse =
617
3.51k
        DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
618
3.51k
                              [](MachineInstr *) 
{ return true; }2.38k
,
619
3.51k
                              DppVgprWaitStates);
620
3.51k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
621
3.51k
  }
622
1.27k
623
1.27k
  WaitStatesNeeded = std::max(
624
1.27k
      WaitStatesNeeded,
625
1.27k
      DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
626
1.27k
                                                DppExecWaitStates));
627
1.27k
628
1.27k
  return WaitStatesNeeded;
629
1.27k
}
630
631
407
int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
632
407
  const SIInstrInfo *TII = ST.getInstrInfo();
633
407
634
407
  // v_div_fmas requires 4 wait states after a write to vcc from a VALU
635
407
  // instruction.
636
407
  const int DivFMasWaitStates = 4;
637
1.38k
  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
638
407
  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
639
407
                                               DivFMasWaitStates);
640
407
641
407
  return DivFMasWaitStates - WaitStatesNeeded;
642
407
}
643
644
325
int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
645
325
  const SIInstrInfo *TII = ST.getInstrInfo();
646
325
  unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
647
325
648
325
  const int GetRegWaitStates = 2;
649
325
  auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
650
16
    return GetRegHWReg == getHWReg(TII, *MI);
651
16
  };
652
325
  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
653
325
654
325
  return GetRegWaitStates - WaitStatesNeeded;
655
325
}
656
657
255
int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
658
255
  const SIInstrInfo *TII = ST.getInstrInfo();
659
255
  unsigned HWReg = getHWReg(TII, *SetRegInstr);
660
255
661
255
  const int SetRegWaitStates = ST.getSetRegWaitStates();
662
255
  auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
663
16
    return HWReg == getHWReg(TII, *MI);
664
16
  };
665
255
  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
666
255
  return SetRegWaitStates - WaitStatesNeeded;
667
255
}
668
669
318k
int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
670
318k
  if (!MI.mayStore())
671
292k
    return -1;
672
26.5k
673
26.5k
  const SIInstrInfo *TII = ST.getInstrInfo();
674
26.5k
  unsigned Opcode = MI.getOpcode();
675
26.5k
  const MCInstrDesc &Desc = MI.getDesc();
676
26.5k
677
26.5k
  int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
678
26.5k
  int VDataRCID = -1;
679
26.5k
  if (VDataIdx != -1)
680
8.54k
    VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
681
26.5k
682
26.5k
  if (TII->isMUBUF(MI) || 
TII->isMTBUF(MI)20.9k
) {
683
5.59k
    // There is no hazard if the instruction does not use vector regs
684
5.59k
    // (like wbinvl1)
685
5.59k
    if (VDataIdx == -1)
686
118
      return -1;
687
5.47k
    // For MUBUF/MTBUF instructions this hazard only exists if the
688
5.47k
    // instruction is not using a register in the soffset field.
689
5.47k
    const MachineOperand *SOffset =
690
5.47k
        TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
691
5.47k
    // If we have no soffset operand, then assume this field has been
692
5.47k
    // hardcoded to zero.
693
5.47k
    if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
694
5.47k
        
(2.25k
!SOffset2.25k
||
!SOffset->isReg()2.25k
))
695
2.24k
      return VDataIdx;
696
24.1k
  }
697
24.1k
698
24.1k
  // MIMG instructions create a hazard if they don't use a 256-bit T# and
699
24.1k
  // the store size is greater than 8 bytes and they have more than two bits
700
24.1k
  // of their dmask set.
701
24.1k
  // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
702
24.1k
  if (TII->isMIMG(MI)) {
703
0
    int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
704
0
    assert(SRsrcIdx != -1 &&
705
0
           AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
706
0
    (void)SRsrcIdx;
707
0
  }
708
24.1k
709
24.1k
  if (TII->isFLAT(MI)) {
710
3.06k
    int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
711
3.06k
    if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
712
1.93k
      return DataIdx;
713
22.2k
  }
714
22.2k
715
22.2k
  return -1;
716
22.2k
}
717
718
int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
719
365k
            const MachineRegisterInfo &MRI) {
720
365k
  // Helper to check for the hazard where VMEM instructions that store more than
721
365k
  // 8 bytes can have there store data over written by the next instruction.
722
365k
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
723
365k
724
365k
  const int VALUWaitStates = 1;
725
365k
  int WaitStatesNeeded = 0;
726
365k
727
365k
  if (!TRI->isVGPR(MRI, Def.getReg()))
728
39.0k
    return WaitStatesNeeded;
729
326k
  unsigned Reg = Def.getReg();
730
326k
  auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
731
318k
    int DataIdx = createsVALUHazard(*MI);
732
318k
    return DataIdx >= 0 &&
733
318k
    
TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg)4.17k
;
734
318k
  };
735
326k
  int WaitStatesNeededForDef =
736
326k
    VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
737
326k
  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
738
326k
739
326k
  return WaitStatesNeeded;
740
326k
}
741
742
407k
int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
743
407k
  // This checks for the hazard where VMEM instructions that store more than
744
407k
  // 8 bytes can have there store data over written by the next instruction.
745
407k
  if (!ST.has12DWordStoreHazard())
746
65.6k
    return 0;
747
341k
748
341k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
749
341k
  int WaitStatesNeeded = 0;
750
341k
751
361k
  for (const MachineOperand &Def : VALU->defs()) {
752
361k
    WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
753
361k
  }
754
341k
755
341k
  return WaitStatesNeeded;
756
341k
}
757
758
3.29k
int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
759
3.29k
  // This checks for hazards associated with inline asm statements.
760
3.29k
  // Since inline asms can contain just about anything, we use this
761
3.29k
  // to call/leverage other check*Hazard routines. Note that
762
3.29k
  // this function doesn't attempt to address all possible inline asm
763
3.29k
  // hazards (good luck), but is a collection of what has been
764
3.29k
  // problematic thus far.
765
3.29k
766
3.29k
  // see checkVALUHazards()
767
3.29k
  if (!ST.has12DWordStoreHazard())
768
652
    return 0;
769
2.63k
770
2.63k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
771
2.63k
  int WaitStatesNeeded = 0;
772
2.63k
773
2.63k
  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
774
13.3k
       I != E; 
++I10.6k
) {
775
10.6k
    const MachineOperand &Op = IA->getOperand(I);
776
10.6k
    if (Op.isReg() && 
Op.isDef()5.33k
) {
777
3.69k
      WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
778
3.69k
    }
779
10.6k
  }
780
2.63k
781
2.63k
  return WaitStatesNeeded;
782
2.63k
}
783
784
187
int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
785
187
  const SIInstrInfo *TII = ST.getInstrInfo();
786
187
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
787
187
  const MachineRegisterInfo &MRI = MF.getRegInfo();
788
187
789
187
  const MachineOperand *LaneSelectOp =
790
187
      TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
791
187
792
187
  if (!LaneSelectOp->isReg() || 
!TRI->isSGPRReg(MRI, LaneSelectOp->getReg())70
)
793
117
    return 0;
794
70
795
70
  unsigned LaneSelectReg = LaneSelectOp->getReg();
796
158
  auto IsHazardFn = [TII] (MachineInstr *MI) {
797
158
    return TII->isVALU(*MI);
798
158
  };
799
70
800
70
  const int RWLaneWaitStates = 4;
801
70
  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
802
70
                                              RWLaneWaitStates);
803
70
  return RWLaneWaitStates - WaitStatesSince;
804
70
}
805
806
8
int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
807
8
  if (!ST.hasRFEHazards())
808
4
    return 0;
809
4
810
4
  const SIInstrInfo *TII = ST.getInstrInfo();
811
4
812
4
  const int RFEWaitStates = 1;
813
4
814
4
  auto IsHazardFn = [TII] (MachineInstr *MI) {
815
4
    return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
816
4
  };
817
4
  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
818
4
  return RFEWaitStates - WaitStatesNeeded;
819
4
}
820
821
957k
int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
822
957k
  if (MI->isDebugInstr())
823
101
    return 0;
824
957k
825
957k
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
826
957k
  if (!ST.hasSMovFedHazard())
827
766k
    return 0;
828
190k
829
190k
  // Check for any instruction reading an SGPR after a write from
830
190k
  // s_mov_fed_b32.
831
190k
  int MovFedWaitStates = 1;
832
190k
  int WaitStatesNeeded = 0;
833
190k
834
764k
  for (const MachineOperand &Use : MI->uses()) {
835
764k
    if (!Use.isReg() || 
TRI->isVGPR(MF.getRegInfo(), Use.getReg())433k
)
836
463k
      continue;
837
300k
    auto IsHazardFn = [] (MachineInstr *MI) {
838
284k
      return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
839
284k
    };
840
300k
    int WaitStatesNeededForUse =
841
300k
        MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
842
300k
                                                 MovFedWaitStates);
843
300k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
844
300k
  }
845
190k
846
190k
  return WaitStatesNeeded;
847
190k
}
848
849
517
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
850
517
  const SIInstrInfo *TII = ST.getInstrInfo();
851
517
  const int SMovRelWaitStates = 1;
852
517
  auto IsHazardFn = [TII] (MachineInstr *MI) {
853
492
    return TII->isSALU(*MI);
854
492
  };
855
517
  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
856
517
                                                   SMovRelWaitStates);
857
517
}
858
859
437k
void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
860
437k
  fixVMEMtoScalarWriteHazards(MI);
861
437k
  fixVcmpxPermlaneHazards(MI);
862
437k
  fixSMEMtoVectorWriteHazards(MI);
863
437k
  fixVcmpxExecWARHazard(MI);
864
437k
  fixLdsBranchVmemWARHazard(MI);
865
437k
}
866
867
437k
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
868
437k
  if (!ST.hasVcmpxPermlaneHazard() || 
!isPermlane(*MI)28.1k
)
869
437k
    return false;
870
36
871
36
  const SIInstrInfo *TII = ST.getInstrInfo();
872
65
  auto IsHazardFn = [TII] (MachineInstr *MI) {
873
65
    return TII->isVOPC(*MI);
874
65
  };
875
36
876
48
  auto IsExpiredFn = [] (MachineInstr *MI, int) {
877
48
    if (!MI)
878
5
      return false;
879
43
    unsigned Opc = MI->getOpcode();
880
43
    return SIInstrInfo::isVALU(*MI) &&
881
43
           
Opc != AMDGPU::V_NOP_e3241
&&
882
43
           
Opc != AMDGPU::V_NOP_e6431
&&
883
43
           
Opc != AMDGPU::V_NOP_sdwa31
;
884
43
  };
885
36
886
36
  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
887
36
      std::numeric_limits<int>::max())
888
31
    return false;
889
5
890
5
  // V_NOP will be discarded by SQ.
891
5
  // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
892
5
  // which is always a VGPR and available.
893
5
  auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
894
5
  unsigned Reg = Src0->getReg();
895
5
  bool IsUndef = Src0->isUndef();
896
5
  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
897
5
          TII->get(AMDGPU::V_MOV_B32_e32))
898
5
    .addReg(Reg, RegState::Define | (IsUndef ? 
RegState::Dead1
:
04
))
899
5
    .addReg(Reg, IsUndef ? 
RegState::Undef1
:
RegState::Kill4
);
900
5
901
5
  return true;
902
5
}
903
904
437k
bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
905
437k
  if (!ST.hasVMEMtoScalarWriteHazard())
906
408k
    return false;
907
28.1k
908
28.1k
  if (!SIInstrInfo::isSALU(*MI) && 
!SIInstrInfo::isSMRD(*MI)18.6k
)
909
15.7k
    return false;
910
12.4k
911
12.4k
  if (MI->getNumDefs() == 0)
912
6.40k
    return false;
913
6.03k
914
6.03k
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
915
6.03k
916
13.8k
  auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
917
13.8k
    if (!SIInstrInfo::isVMEM(*I) && 
!SIInstrInfo::isDS(*I)13.7k
&&
918
13.8k
        
!SIInstrInfo::isFLAT(*I)13.7k
)
919
13.5k
      return false;
920
363
921
363
    for (const MachineOperand &Def : MI->defs()) {
922
363
      MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
923
363
      if (!Op)
924
314
        continue;
925
49
      return true;
926
49
    }
927
363
    
return false314
;
928
363
  };
929
6.03k
930
12.7k
  auto IsExpiredFn = [] (MachineInstr *MI, int) {
931
12.7k
    return MI && 
(12.7k
SIInstrInfo::isVALU(*MI)12.7k
||
932
12.7k
                  
(10.9k
MI->getOpcode() == AMDGPU::S_WAITCNT10.9k
&&
933
10.9k
                   
!MI->getOperand(0).getImm()1.64k
));
934
12.7k
  };
935
6.03k
936
6.03k
  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
937
6.03k
      std::numeric_limits<int>::max())
938
5.98k
    return false;
939
49
940
49
  const SIInstrInfo *TII = ST.getInstrInfo();
941
49
  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
942
49
  return true;
943
49
}
944
945
437k
bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
946
437k
  if (!ST.hasSMEMtoVectorWriteHazard())
947
408k
    return false;
948
28.1k
949
28.1k
  if (!SIInstrInfo::isVALU(*MI))
950
18.6k
    return false;
951
9.50k
952
9.50k
  unsigned SDSTName;
953
9.50k
  switch (MI->getOpcode()) {
954
9.50k
  case AMDGPU::V_READLANE_B32:
955
75
  case AMDGPU::V_READFIRSTLANE_B32:
956
75
    SDSTName = AMDGPU::OpName::vdst;
957
75
    break;
958
9.43k
  default:
959
9.43k
    SDSTName = AMDGPU::OpName::sdst;
960
9.43k
    break;
961
9.50k
  }
962
9.50k
963
9.50k
  const SIInstrInfo *TII = ST.getInstrInfo();
964
9.50k
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
965
9.50k
  const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
966
9.50k
  const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
967
9.50k
  if (!SDST) {
968
15.0k
    for (const auto &MO : MI->implicit_operands()) {
969
15.0k
      if (MO.isDef() && 
TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))1.67k
) {
970
445
        SDST = &MO;
971
445
        break;
972
445
      }
973
15.0k
    }
974
8.39k
  }
975
9.50k
976
9.50k
  if (!SDST)
977
7.95k
    return false;
978
1.55k
979
1.55k
  const unsigned SDSTReg = SDST->getReg();
980
18.2k
  auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
981
18.2k
    return SIInstrInfo::isSMRD(*I) && 
I->readsRegister(SDSTReg, TRI)66
;
982
18.2k
  };
983
1.55k
984
18.1k
  auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
985
18.1k
    if (MI) {
986
18.1k
      if (TII->isSALU(*MI)) {
987
1.94k
        switch (MI->getOpcode()) {
988
1.94k
        case AMDGPU::S_SETVSKIP:
989
32
        case AMDGPU::S_VERSION:
990
32
        case AMDGPU::S_WAITCNT_VSCNT:
991
32
        case AMDGPU::S_WAITCNT_VMCNT:
992
32
        case AMDGPU::S_WAITCNT_EXPCNT:
993
32
          // These instructions cannot not mitigate the hazard.
994
32
          return false;
995
32
        case AMDGPU::S_WAITCNT_LGKMCNT:
996
2
          // Reducing lgkmcnt count to 0 always mitigates the hazard.
997
2
          return (MI->getOperand(1).getImm() == 0) &&
998
2
                 
(MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL)1
;
999
1.16k
        case AMDGPU::S_WAITCNT: {
1000
1.16k
          const int64_t Imm = MI->getOperand(0).getImm();
1001
1.16k
          AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1002
1.16k
          return (Decoded.LgkmCnt == 0);
1003
32
        }
1004
739
        default:
1005
739
          // SOPP instructions cannot mitigate the hazard.
1006
739
          if (TII->isSOPP(*MI))
1007
180
            return false;
1008
559
          // At this point the SALU can be assumed to mitigate the hazard
1009
559
          // because either:
1010
559
          // (a) it is independent of the at risk SMEM (breaking chain),
1011
559
          // or
1012
559
          // (b) it is dependent on the SMEM, in which case an appropriate
1013
559
          //     s_waitcnt lgkmcnt _must_ exist between it and the at risk
1014
559
          //     SMEM instruction.
1015
559
          return true;
1016
1.94k
        }
1017
1.94k
      }
1018
18.1k
    }
1019
16.1k
    return false;
1020
16.1k
  };
1021
1.55k
1022
1.55k
  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1023
1.55k
      std::numeric_limits<int>::max())
1024
1.53k
    return false;
1025
20
1026
20
  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1027
20
          TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1028
20
      .addImm(0);
1029
20
  return true;
1030
20
}
1031
1032
437k
bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1033
437k
  if (!ST.hasVcmpxExecWARHazard() || 
!SIInstrInfo::isVALU(*MI)28.1k
)
1034
427k
    return false;
1035
9.50k
1036
9.50k
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1037
9.50k
  if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1038
9.49k
    return false;
1039
15
1040
16
  
auto IsHazardFn = [TRI] (MachineInstr *I) 15
{
1041
16
    if (SIInstrInfo::isVALU(*I))
1042
10
      return false;
1043
6
    return I->readsRegister(AMDGPU::EXEC, TRI);
1044
6
  };
1045
15
1046
15
  const SIInstrInfo *TII = ST.getInstrInfo();
1047
15
  auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
1048
13
    if (!MI)
1049
0
      return false;
1050
13
    if (SIInstrInfo::isVALU(*MI)) {
1051
10
      if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
1052
1
        return true;
1053
9
      for (auto MO : MI->implicit_operands())
1054
9
        if (MO.isDef() && 
TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))1
)
1055
1
          return true;
1056
9
    }
1057
13
    
if (11
MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR11
&&
1058
11
        
(MI->getOperand(0).getImm() & 0xfffe) == 0xfffe3
)
1059
2
      return true;
1060
9
    return false;
1061
9
  };
1062
15
1063
15
  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1064
15
      std::numeric_limits<int>::max())
1065
12
    return false;
1066
3
1067
3
  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1068
3
          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1069
3
    .addImm(0xfffe);
1070
3
  return true;
1071
3
}
1072
1073
437k
bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1074
437k
  if (!ST.hasLdsBranchVmemWARHazard())
1075
408k
    return false;
1076
28.1k
1077
51.7k
  
auto IsHazardInst = [] (const MachineInstr *MI) 28.1k
{
1078
51.7k
    if (SIInstrInfo::isDS(*MI))
1079
218
      return 1;
1080
51.5k
    if (SIInstrInfo::isVMEM(*MI) || 
SIInstrInfo::isSegmentSpecificFLAT(*MI)47.9k
)
1081
5.68k
      return 2;
1082
45.8k
    return 0;
1083
45.8k
  };
1084
28.1k
1085
28.1k
  auto InstType = IsHazardInst(MI);
1086
28.1k
  if (!InstType)
1087
24.7k
    return false;
1088
3.41k
1089
14.5k
  
auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) 3.41k
{
1090
14.5k
    return I && 
(14.5k
IsHazardInst(I)14.5k
||
1091
14.5k
                 
(12.5k
I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT12.5k
&&
1092
12.5k
                  
I->getOperand(0).getReg() == AMDGPU::SGPR_NULL291
&&
1093
12.5k
                  
!I->getOperand(1).getImm()290
));
1094
14.5k
  };
1095
3.41k
1096
15.5k
  auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1097
15.5k
    if (!I->isBranch())
1098
15.2k
      return false;
1099
266
1100
4.73k
    
auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) 266
{
1101
4.73k
      auto InstType2 = IsHazardInst(I);
1102
4.73k
      return InstType2 && 
InstType != InstType2237
;
1103
4.73k
    };
1104
266
1105
4.35k
    auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1106
4.35k
      if (!I)
1107
5
        return false;
1108
4.34k
1109
4.34k
      auto InstType2 = IsHazardInst(I);
1110
4.34k
      if (InstType == InstType2)
1111
224
        return true;
1112
4.12k
1113
4.12k
      return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1114
4.12k
             
I->getOperand(0).getReg() == AMDGPU::SGPR_NULL1
&&
1115
4.12k
             
!I->getOperand(1).getImm()1
;
1116
4.12k
    };
1117
266
1118
266
    return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
1119
266
           std::numeric_limits<int>::max();
1120
266
  };
1121
3.41k
1122
3.41k
  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1123
3.41k
      std::numeric_limits<int>::max())
1124
3.39k
    return false;
1125
13
1126
13
  const SIInstrInfo *TII = ST.getInstrInfo();
1127
13
  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1128
13
          TII->get(AMDGPU::S_WAITCNT_VSCNT))
1129
13
    .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1130
13
    .addImm(0);
1131
13
1132
13
  return true;
1133
13
}
1134
1135
63.0k
int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1136
63.0k
  int NSAtoVMEMWaitStates = 1;
1137
63.0k
1138
63.0k
  if (!ST.hasNSAtoVMEMBug())
1139
0
    return 0;
1140
63.0k
1141
63.0k
  if (!SIInstrInfo::isMUBUF(*MI) && 
!SIInstrInfo::isMTBUF(*MI)58.8k
)
1142
58.6k
    return 0;
1143
4.39k
1144
4.39k
  const SIInstrInfo *TII = ST.getInstrInfo();
1145
4.39k
  const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1146
4.39k
  if (!Offset || 
(Offset->getImm() & 6) == 03.85k
)
1147
2.84k
    return 0;
1148
1.54k
1149
1.54k
  auto IsHazardFn = [TII] (MachineInstr *I) {
1150
1.51k
    if (!SIInstrInfo::isMIMG(*I))
1151
1.51k
      return false;
1152
3
    const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1153
3
    return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1154
3
           
TII->getInstSizeInBytes(*I) >= 162
;
1155
3
  };
1156
1.54k
1157
1.54k
  return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1158
1.54k
}
1159
1160
925k
int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1161
925k
  int FPAtomicToDenormModeWaitStates = 3;
1162
925k
1163
925k
  if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1164
925k
    return 0;
1165
32
1166
32
  auto IsHazardFn = [] (MachineInstr *I) {
1167
32
    if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
1168
2
      return false;
1169
30
    return SIInstrInfo::isFPAtomic(*I);
1170
30
  };
1171
32
1172
32
  auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1173
2
    if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1174
1
      return true;
1175
1
1176
1
    switch (MI->getOpcode()) {
1177
1
    case AMDGPU::S_WAITCNT:
1178
1
    case AMDGPU::S_WAITCNT_VSCNT:
1179
1
    case AMDGPU::S_WAITCNT_VMCNT:
1180
1
    case AMDGPU::S_WAITCNT_EXPCNT:
1181
1
    case AMDGPU::S_WAITCNT_LGKMCNT:
1182
1
    case AMDGPU::S_WAITCNT_IDLE:
1183
1
      return true;
1184
1
    default:
1185
0
      break;
1186
0
    }
1187
0
1188
0
    return false;
1189
0
  };
1190
32
1191
32
1192
32
  return FPAtomicToDenormModeWaitStates -
1193
32
         ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1194
32
}
1195
1196
10.7k
int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
1197
10.7k
  assert(SIInstrInfo::isMAI(*MI));
1198
10.7k
1199
10.7k
  int WaitStatesNeeded = 0;
1200
10.7k
  unsigned Opc = MI->getOpcode();
1201
10.7k
1202
18.4k
  auto IsVALUFn = [] (MachineInstr *MI) {
1203
18.4k
    return SIInstrInfo::isVALU(*MI);
1204
18.4k
  };
1205
10.7k
1206
10.7k
  if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
1207
3.29k
    const int LegacyVALUWritesVGPRWaitStates = 2;
1208
3.29k
    const int VALUWritesExecWaitStates = 4;
1209
3.29k
    const int MaxWaitStates = 4;
1210
3.29k
1211
3.29k
    int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1212
3.29k
      getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1213
3.29k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1214
3.29k
1215
3.29k
    if (WaitStatesNeeded < MaxWaitStates) {
1216
4.30k
      for (const MachineOperand &Use : MI->explicit_uses()) {
1217
4.30k
        const int MaxWaitStates = 2;
1218
4.30k
1219
4.30k
        if (!Use.isReg() || 
!TRI.isVGPR(MF.getRegInfo(), Use.getReg())3.34k
)
1220
1.14k
          continue;
1221
3.16k
1222
3.16k
        int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1223
3.16k
          getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
1224
3.16k
        WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1225
3.16k
1226
3.16k
        if (WaitStatesNeeded == MaxWaitStates)
1227
19
          break;
1228
3.16k
      }
1229
3.29k
    }
1230
3.29k
  }
1231
10.7k
1232
133k
  auto IsMFMAFn = [] (MachineInstr *MI) {
1233
133k
    return SIInstrInfo::isMAI(*MI) &&
1234
133k
           
MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B3287.4k
&&
1235
133k
           
MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B3235.5k
;
1236
133k
  };
1237
10.7k
1238
22.5k
  for (const MachineOperand &Op : MI->explicit_operands()) {
1239
22.5k
    if (!Op.isReg() || 
!TRI.isAGPR(MF.getRegInfo(), Op.getReg())21.5k
)
1240
11.6k
      continue;
1241
10.9k
1242
10.9k
    if (Op.isDef() && 
Opc != AMDGPU::V_ACCVGPR_WRITE_B323.29k
)
1243
218
      continue;
1244
10.7k
1245
10.7k
    const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
1246
10.7k
    const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
1247
10.7k
    const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
1248
10.7k
    const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
1249
10.7k
    const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
1250
10.7k
    const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
1251
10.7k
    const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
1252
10.7k
    const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
1253
10.7k
    const int MaxWaitStates = 18;
1254
10.7k
    unsigned Reg = Op.getReg();
1255
10.7k
    unsigned HazardDefLatency = 0;
1256
10.7k
1257
10.7k
    auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
1258
96.5k
                              (MachineInstr *MI) {
1259
96.5k
      if (!IsMFMAFn(MI))
1260
91.5k
        return false;
1261
5.01k
      unsigned DstReg = MI->getOperand(0).getReg();
1262
5.01k
      if (DstReg == Reg)
1263
24
        return false;
1264
4.99k
      HazardDefLatency = std::max(HazardDefLatency,
1265
4.99k
                                  TSchedModel.computeInstrLatency(MI));
1266
4.99k
      return TRI.regsOverlap(DstReg, Reg);
1267
4.99k
    };
1268
10.7k
1269
10.7k
    int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
1270
10.7k
                                                   MaxWaitStates);
1271
10.7k
    int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
1272
10.7k
    int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1273
10.7k
    int OpNo = MI->getOperandNo(&Op);
1274
10.7k
    if (OpNo == SrcCIdx) {
1275
195
      NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1276
10.5k
    } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
1277
7.46k
      switch (HazardDefLatency) {
1278
7.46k
      case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
1279
179
               break;
1280
7.46k
      case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
1281
1.30k
               break;
1282
7.46k
      
case 16: 3.37k
LLVM_FALLTHROUGH3.37k
;
1283
5.98k
      default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
1284
5.98k
               break;
1285
3.08k
      }
1286
3.08k
    } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1287
3.07k
      switch (HazardDefLatency) {
1288
3.07k
      case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
1289
29
               break;
1290
3.07k
      case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
1291
2
               break;
1292
3.07k
      
case 16: 18
LLVM_FALLTHROUGH18
;
1293
3.04k
      default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
1294
3.04k
               break;
1295
10.7k
      }
1296
10.7k
    }
1297
10.7k
1298
10.7k
    int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1299
10.7k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1300
10.7k
1301
10.7k
    if (WaitStatesNeeded == MaxWaitStates)
1302
137
      return WaitStatesNeeded; // Early exit.
1303
10.5k
1304
118k
    
auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) 10.5k
{
1305
118k
      if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
1306
75.0k
        return false;
1307
43.7k
      unsigned DstReg = MI->getOperand(0).getReg();
1308
43.7k
      return TRI.regsOverlap(Reg, DstReg);
1309
43.7k
    };
1310
10.5k
1311
10.5k
    const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
1312
10.5k
    const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
1313
10.5k
    const int AccVGPRWriteAccVgprReadWaitStates = 3;
1314
10.5k
    NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
1315
10.5k
    if (OpNo == SrcCIdx)
1316
195
      NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1317
10.4k
    else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
1318
7.32k
      NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
1319
10.5k
1320
10.5k
    WaitStatesNeededForUse = NeedWaitStates -
1321
10.5k
      getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
1322
10.5k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1323
10.5k
1324
10.5k
    if (WaitStatesNeeded == MaxWaitStates)
1325
0
      return WaitStatesNeeded; // Early exit.
1326
10.5k
  }
1327
10.7k
1328
10.7k
  
if (10.6k
Opc == AMDGPU::V_ACCVGPR_WRITE_B3210.6k
) {
1329
3.07k
    const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
1330
3.07k
    const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
1331
3.07k
    const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
1332
3.07k
    const int MaxWaitStates = 13;
1333
3.07k
    unsigned DstReg = MI->getOperand(0).getReg();
1334
3.07k
    unsigned HazardDefLatency = 0;
1335
3.07k
1336
3.07k
    auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
1337
36.8k
                         (MachineInstr *MI) {
1338
36.8k
      if (!IsMFMAFn(MI))
1339
36.7k
        return false;
1340
47
      unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
1341
47
      HazardDefLatency = std::max(HazardDefLatency,
1342
47
                                  TSchedModel.computeInstrLatency(MI));
1343
47
      return TRI.regsOverlap(Reg, DstReg);
1344
47
    };
1345
3.07k
1346
3.07k
    int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
1347
3.07k
    int NeedWaitStates;
1348
3.07k
    switch (HazardDefLatency) {
1349
3.07k
    case 2:  NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
1350
20
             break;
1351
3.07k
    case 8:  NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
1352
2
             break;
1353
3.07k
    
case 16: 13
LLVM_FALLTHROUGH13
;
1354
3.05k
    default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
1355
3.05k
             break;
1356
3.07k
    }
1357
3.07k
1358
3.07k
    int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
1359
3.07k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1360
3.07k
  }
1361
10.6k
1362
10.6k
  return WaitStatesNeeded;
1363
10.6k
}
1364
1365
234k
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
1366
234k
  if (!ST.hasMAIInsts())
1367
231k
    return 0;
1368
3.82k
1369
3.82k
  int WaitStatesNeeded = 0;
1370
3.82k
1371
8.24k
  auto IsAccVgprReadFn = [] (MachineInstr *MI) {
1372
8.24k
    return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
1373
8.24k
  };
1374
3.82k
1375
20.3k
  for (const MachineOperand &Op : MI->explicit_uses()) {
1376
20.3k
    if (!Op.isReg() || 
!TRI.isVGPR(MF.getRegInfo(), Op.getReg())6.33k
)
1377
15.1k
      continue;
1378
5.13k
1379
5.13k
    unsigned Reg = Op.getReg();
1380
5.13k
1381
5.13k
    const int AccVgprReadLdStWaitStates = 2;
1382
5.13k
    const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
1383
5.13k
    const int MaxWaitStates = 2;
1384
5.13k
1385
5.13k
    int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
1386
5.13k
      getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
1387
5.13k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1388
5.13k
1389
5.13k
    if (WaitStatesNeeded == MaxWaitStates)
1390
3
      return WaitStatesNeeded; // Early exit.
1391
5.13k
1392
8.24k
    
auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) 5.13k
{
1393
8.24k
      if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
1394
7.85k
        return false;
1395
480
      
auto IsVALUFn = [] (MachineInstr *MI) 389
{
1396
480
        return SIInstrInfo::isVALU(*MI) && 
!SIInstrInfo::isMAI(*MI)391
;
1397
480
      };
1398
389
      return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
1399
389
             std::numeric_limits<int>::max();
1400
389
    };
1401
5.13k
1402
5.13k
    WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
1403
5.13k
      getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
1404
5.13k
    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1405
5.13k
  }
1406
3.82k
1407
3.82k
  
return WaitStatesNeeded3.82k
;
1408
3.82k
}