Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines a hazard recognizer for the SystemZ scheduler.
10
//
11
// This class is used by the SystemZ scheduling strategy to maintain
12
// the state during scheduling, and provide cost functions for
13
// scheduling candidates. This includes:
14
//
15
// * Decoder grouping. A decoder group can maximally hold 3 uops, and
16
// instructions that always begin a new group should be scheduled when
17
// the current decoder group is empty.
18
// * Processor resources usage. It is beneficial to balance the use of
19
// resources.
20
//
21
// A goal is to consider all instructions, also those outside of any
22
// scheduling region. Such instructions are "advanced" past and include
23
// single instructions before a scheduling region, branches etc.
24
//
25
// A block that has only one predecessor continues scheduling with the state
26
// of it (which may be updated by emitting branches).
27
//
28
// ===---------------------------------------------------------------------===//
29
30
#include "SystemZHazardRecognizer.h"
31
#include "llvm/ADT/Statistic.h"
32
33
using namespace llvm;
34
35
#define DEBUG_TYPE "machine-scheduler"
36
37
// This is the limit of processor resource usage at which the
38
// scheduler should try to look for other instructions (not using the
39
// critical resource).
40
static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
41
                                   cl::desc("The OOO window for processor "
42
                                            "resources during scheduling."),
43
                                   cl::init(8));
44
45
unsigned SystemZHazardRecognizer::
46
24.5k
getNumDecoderSlots(SUnit *SU) const {
47
24.5k
  const MCSchedClassDesc *SC = getSchedClass(SU);
48
24.5k
  if (!SC->isValid())
49
561
    return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
50
23.9k
51
23.9k
  assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
52
23.9k
         "Only cracked instruction can have 2 uops.");
53
23.9k
  assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
54
23.9k
         "Expanded instructions always group alone.");
55
23.9k
  assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
56
23.9k
         "Expanded instructions fill the group(s).");
57
23.9k
58
23.9k
  return SC->NumMicroOps;
59
23.9k
}
60
61
245
unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
62
245
  unsigned Idx = CurrGroupSize;
63
245
  if (GrpCount % 2)
64
66
    Idx += 3;
65
245
66
245
  if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
67
0
    if (Idx == 1 || Idx == 2)
68
0
      Idx = 3;
69
0
    else if (Idx == 4 || Idx == 5)
70
0
      Idx = 0;
71
0
  }
72
245
73
245
  return Idx;
74
245
}
75
76
ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
77
0
getHazardType(SUnit *m, int Stalls) {
78
0
  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
79
0
}
80
81
4.43k
void SystemZHazardRecognizer::Reset() {
82
4.43k
  CurrGroupSize = 0;
83
4.43k
  CurrGroupHas4RegOps = false;
84
4.43k
  clearProcResCounters();
85
4.43k
  GrpCount = 0;
86
4.43k
  LastFPdOpCycleIdx = UINT_MAX;
87
4.43k
  LastEmittedMI = nullptr;
88
4.43k
  LLVM_DEBUG(CurGroupDbg = "";);
89
4.43k
}
90
91
bool
92
24.9k
SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
93
24.9k
  const MCSchedClassDesc *SC = getSchedClass(SU);
94
24.9k
  if (!SC->isValid())
95
561
    return true;
96
24.3k
97
24.3k
  // A cracked instruction only fits into schedule if the current
98
24.3k
  // group is empty.
99
24.3k
  if (SC->BeginGroup)
100
1.02k
    return (CurrGroupSize == 0);
101
23.3k
102
23.3k
  // An instruction with 4 register operands will not fit in last slot.
103
23.3k
  assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
104
23.3k
          "Current decoder group is already full!");
105
23.3k
  if (CurrGroupSize == 2 && 
has4RegOps(SU->getInstr())5.96k
)
106
80
    return false;
107
23.2k
108
23.2k
  // Since a full group is handled immediately in EmitInstruction(),
109
23.2k
  // SU should fit into current group. NumSlots should be 1 or 0,
110
23.2k
  // since it is not a cracked or expanded instruction.
111
23.2k
  assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
112
23.2k
          "Expected normal instruction to fit in non-full group!");
113
23.2k
114
23.2k
  return true;
115
23.2k
}
116
117
34.4k
bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
118
34.4k
  const MachineFunction &MF = *MI->getParent()->getParent();
119
34.4k
  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
120
34.4k
  const MCInstrDesc &MID = MI->getDesc();
121
34.4k
  unsigned Count = 0;
122
152k
  for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); 
OpIdx++117k
) {
123
117k
    const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
124
117k
    if (RC == nullptr)
125
29.2k
      continue;
126
88.3k
    if (OpIdx >= MID.getNumDefs() &&
127
88.3k
        
MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -160.6k
)
128
4.36k
      continue;
129
83.9k
    Count++;
130
83.9k
  }
131
34.4k
  return Count >= 4;
132
34.4k
}
133
134
7.57k
void SystemZHazardRecognizer::nextGroup() {
135
7.57k
  if (CurrGroupSize == 0)
136
63
    return;
137
7.51k
138
7.51k
  LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
139
7.51k
  LLVM_DEBUG(CurGroupDbg = "";);
140
7.51k
141
7.51k
  int NumGroups = ((CurrGroupSize > 3) ? 
(CurrGroupSize / 3)6
:
17.50k
);
142
7.51k
  assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
143
7.51k
         "Current decoder group bad.");
144
7.51k
145
7.51k
  // Reset counter for next group.
146
7.51k
  CurrGroupSize = 0;
147
7.51k
  CurrGroupHas4RegOps = false;
148
7.51k
149
7.51k
  GrpCount += ((unsigned) NumGroups);
150
7.51k
151
7.51k
  // Decrease counters for execution units.
152
66.2k
  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); 
++i58.7k
)
153
58.7k
    ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
154
58.7k
                                   ? 
(ProcResourceCounters[i] - NumGroups)10.1k
155
58.7k
                                   : 
048.5k
);
156
7.51k
157
7.51k
  // Clear CriticalResourceIdx if it is now below the threshold.
158
7.51k
  if (CriticalResourceIdx != UINT_MAX &&
159
7.51k
      (ProcResourceCounters[CriticalResourceIdx] <=
160
4.15k
       ProcResCostLim))
161
126
    CriticalResourceIdx = UINT_MAX;
162
7.51k
163
7.51k
  LLVM_DEBUG(dumpState(););
164
7.51k
}
165
166
#ifndef NDEBUG // Debug output
167
void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
168
  OS << "SU(" << SU->NodeNum << "):";
169
  OS << TII->getName(SU->getInstr()->getOpcode());
170
171
  const MCSchedClassDesc *SC = getSchedClass(SU);
172
  if (!SC->isValid())
173
    return;
174
175
  for (TargetSchedModel::ProcResIter
176
         PI = SchedModel->getWriteProcResBegin(SC),
177
         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
178
    const MCProcResourceDesc &PRD =
179
      *SchedModel->getProcResource(PI->ProcResourceIdx);
180
    std::string FU(PRD.Name);
181
    // trim e.g. Z13_FXaUnit -> FXa
182
    FU = FU.substr(FU.find("_") + 1);
183
    size_t Pos = FU.find("Unit");
184
    if (Pos != std::string::npos)
185
      FU.resize(Pos);
186
    if (FU == "LS") // LSUnit -> LSU
187
      FU = "LSU";
188
    OS << "/" << FU;
189
190
    if (PI->Cycles > 1)
191
      OS << "(" << PI->Cycles << "cyc)";
192
  }
193
194
  if (SC->NumMicroOps > 1)
195
    OS << "/" << SC->NumMicroOps << "uops";
196
  if (SC->BeginGroup && SC->EndGroup)
197
    OS << "/GroupsAlone";
198
  else if (SC->BeginGroup)
199
    OS << "/BeginsGroup";
200
  else if (SC->EndGroup)
201
    OS << "/EndsGroup";
202
  if (SU->isUnbuffered)
203
    OS << "/Unbuffered";
204
  if (has4RegOps(SU->getInstr()))
205
    OS << "/4RegOps";
206
}
207
208
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
209
  dbgs() << "++ " << Msg;
210
  dbgs() << ": ";
211
212
  if (CurGroupDbg.empty())
213
    dbgs() << " <empty>\n";
214
  else {
215
    dbgs() << "{ " << CurGroupDbg << " }";
216
    dbgs() << " (" << CurrGroupSize << " decoder slot"
217
           << (CurrGroupSize > 1 ? "s":"")
218
           << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
219
           << ")\n";
220
  }
221
}
222
223
void SystemZHazardRecognizer::dumpProcResourceCounters() const {
224
  bool any = false;
225
226
  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
227
    if (ProcResourceCounters[i] > 0) {
228
      any = true;
229
      break;
230
    }
231
232
  if (!any)
233
    return;
234
235
  dbgs() << "++ | Resource counters: ";
236
  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
237
    if (ProcResourceCounters[i] > 0)
238
      dbgs() << SchedModel->getProcResource(i)->Name
239
             << ":" << ProcResourceCounters[i] << " ";
240
  dbgs() << "\n";
241
242
  if (CriticalResourceIdx != UINT_MAX)
243
    dbgs() << "++ | Critical resource: "
244
           << SchedModel->getProcResource(CriticalResourceIdx)->Name
245
           << "\n";
246
}
247
248
void SystemZHazardRecognizer::dumpState() const {
249
  dumpCurrGroup("| Current decoder group");
250
  dbgs() << "++ | Current cycle index: "
251
         << getCurrCycleIdx() << "\n";
252
  dumpProcResourceCounters();
253
  if (LastFPdOpCycleIdx != UINT_MAX)
254
    dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
255
}
256
257
#endif //NDEBUG
258
259
4.43k
void SystemZHazardRecognizer::clearProcResCounters() {
260
4.43k
  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
261
4.43k
  CriticalResourceIdx = UINT_MAX;
262
4.43k
}
263
264
3.01k
static inline bool isBranchRetTrap(MachineInstr *MI) {
265
3.01k
  return (MI->isBranch() || 
MI->isReturn()2.85k
||
266
3.01k
          
MI->getOpcode() == SystemZ::CondTrap2.72k
);
267
3.01k
}
268
269
// Update state with SU as the next scheduled unit.
270
void SystemZHazardRecognizer::
271
24.7k
EmitInstruction(SUnit *SU) {
272
24.7k
  const MCSchedClassDesc *SC = getSchedClass(SU);
273
24.7k
  LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
274
24.7k
             dbgs() << "\n";);
275
24.7k
  LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
276
24.7k
277
24.7k
  // If scheduling an SU that must begin a new decoder group, move on
278
24.7k
  // to next group.
279
24.7k
  if (!fitsIntoCurrentGroup(SU))
280
484
    nextGroup();
281
24.7k
282
24.7k
  LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
283
24.7k
             if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
284
24.7k
285
24.7k
  LastEmittedMI = SU->getInstr();
286
24.7k
287
24.7k
  // After returning from a call, we don't know much about the state.
288
24.7k
  if (SU->isCall) {
289
253
    LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
290
253
    Reset();
291
253
    LastEmittedMI = SU->getInstr();
292
253
    return;
293
253
  }
294
24.4k
295
24.4k
  // Increase counter for execution unit(s).
296
24.4k
  for (TargetSchedModel::ProcResIter
297
24.4k
         PI = SchedModel->getWriteProcResBegin(SC),
298
53.3k
         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; 
++PI28.9k
) {
299
28.9k
    // Don't handle FPd together with the other resources.
300
28.9k
    if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
301
193
      continue;
302
28.7k
    int &CurrCounter =
303
28.7k
      ProcResourceCounters[PI->ProcResourceIdx];
304
28.7k
    CurrCounter += PI->Cycles;
305
28.7k
    // Check if this is now the new critical resource.
306
28.7k
    if ((CurrCounter > ProcResCostLim) &&
307
28.7k
        
(9.10k
CriticalResourceIdx == UINT_MAX9.10k
||
308
9.10k
         
(8.81k
PI->ProcResourceIdx != CriticalResourceIdx8.81k
&&
309
8.81k
          CurrCounter >
310
609
          ProcResourceCounters[CriticalResourceIdx]))) {
311
293
      LLVM_DEBUG(
312
293
          dbgs() << "++ New critical resource: "
313
293
                 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
314
293
                 << "\n";);
315
293
      CriticalResourceIdx = PI->ProcResourceIdx;
316
293
    }
317
28.7k
  }
318
24.4k
319
24.4k
  // Make note of an instruction that uses a blocking resource (FPd).
320
24.4k
  if (SU->isUnbuffered) {
321
193
    LastFPdOpCycleIdx = getCurrCycleIdx(SU);
322
193
    LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
323
193
                      << "\n";);
324
193
  }
325
24.4k
326
24.4k
  // Insert SU into current group by increasing number of slots used
327
24.4k
  // in current group.
328
24.4k
  CurrGroupSize += getNumDecoderSlots(SU);
329
24.4k
  CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
330
24.4k
  unsigned GroupLim = (CurrGroupHas4RegOps ? 
2742
:
323.7k
);
331
24.4k
  assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
332
24.4k
         && "SU does not fit into decoder group!");
333
24.4k
334
24.4k
  // Check if current group is now full/ended. If so, move on to next
335
24.4k
  // group to be ready to evaluate more candidates.
336
24.4k
  if (CurrGroupSize >= GroupLim || 
SC->EndGroup17.6k
)
337
6.94k
    nextGroup();
338
24.4k
}
339
340
12.7k
int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
341
12.7k
  const MCSchedClassDesc *SC = getSchedClass(SU);
342
12.7k
  if (!SC->isValid())
343
46
    return 0;
344
12.6k
345
12.6k
  // If SU begins new group, it can either break a current group early
346
12.6k
  // or fit naturally if current group is empty (negative cost).
347
12.6k
  if (SC->BeginGroup) {
348
114
    if (CurrGroupSize)
349
63
      return 3 - CurrGroupSize;
350
51
    return -1;
351
51
  }
352
12.5k
353
12.5k
  // Similarly, a group-ending SU may either fit well (last in group), or
354
12.5k
  // end the group prematurely.
355
12.5k
  if (SC->EndGroup) {
356
80
    unsigned resultingGroupSize =
357
80
      (CurrGroupSize + getNumDecoderSlots(SU));
358
80
    if (resultingGroupSize < 3)
359
64
      return (3 - resultingGroupSize);
360
16
    return -1;
361
16
  }
362
12.4k
363
12.4k
  // An instruction with 4 register operands will not fit in last slot.
364
12.4k
  if (CurrGroupSize == 2 && 
has4RegOps(SU->getInstr())4.00k
)
365
38
    return 1;
366
12.4k
367
12.4k
  // Most instructions can be placed in any decoder slot.
368
12.4k
  return 0;
369
12.4k
}
370
371
83
bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
372
83
  assert (SU->isUnbuffered);
373
83
  // If this is the first FPd op, it should be scheduled high.
374
83
  if (LastFPdOpCycleIdx == UINT_MAX)
375
83
    
return true31
;
376
52
  // If this is not the first PFd op, it should go into the other side
377
52
  // of the processor to use the other FPd unit there. This should
378
52
  // generally happen if two FPd ops are placed with 2 other
379
52
  // instructions between them (modulo 6).
380
52
  unsigned SUCycleIdx = getCurrCycleIdx(SU);
381
52
  if (LastFPdOpCycleIdx > SUCycleIdx)
382
12
    return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
383
40
  return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
384
40
}
385
386
int SystemZHazardRecognizer::
387
12.7k
resourcesCost(SUnit *SU) {
388
12.7k
  int Cost = 0;
389
12.7k
390
12.7k
  const MCSchedClassDesc *SC = getSchedClass(SU);
391
12.7k
  if (!SC->isValid())
392
46
    return 0;
393
12.6k
394
12.6k
  // For a FPd op, either return min or max value as indicated by the
395
12.6k
  // distance to any prior FPd op.
396
12.6k
  if (SU->isUnbuffered)
397
83
    Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
398
12.6k
  // For other instructions, give a cost to the use of the critical resource.
399
12.6k
  else if (CriticalResourceIdx != UINT_MAX) {
400
8.82k
    for (TargetSchedModel::ProcResIter
401
8.82k
           PI = SchedModel->getWriteProcResBegin(SC),
402
18.6k
           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; 
++PI9.79k
)
403
9.79k
      if (PI->ProcResourceIdx == CriticalResourceIdx)
404
7.72k
        Cost = PI->Cycles;
405
8.82k
  }
406
12.6k
407
12.6k
  return Cost;
408
12.6k
}
409
410
void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
411
3.03k
                                              bool TakenBranch) {
412
3.03k
  // Make a temporary SUnit.
413
3.03k
  SUnit SU(MI, 0);
414
3.03k
415
3.03k
  // Set interesting flags.
416
3.03k
  SU.isCall = MI->isCall();
417
3.03k
418
3.03k
  const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
419
3.03k
  for (const MCWriteProcResEntry &PRE :
420
3.03k
         make_range(SchedModel->getWriteProcResBegin(SC),
421
3.86k
                    SchedModel->getWriteProcResEnd(SC))) {
422
3.86k
    switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
423
3.86k
    case 0:
424
0
      SU.hasReservedResource = true;
425
0
      break;
426
3.86k
    case 1:
427
40
      SU.isUnbuffered = true;
428
40
      break;
429
3.86k
    default:
430
3.82k
      break;
431
3.86k
    }
432
3.86k
  }
433
3.03k
434
3.03k
  unsigned GroupSizeBeforeEmit = CurrGroupSize;
435
3.03k
  EmitInstruction(&SU);
436
3.03k
437
3.03k
  if (!TakenBranch && 
isBranchRetTrap(MI)3.01k
) {
438
287
    // NT Branch on second slot ends group.
439
287
    if (GroupSizeBeforeEmit == 1)
440
137
      nextGroup();
441
287
  }
442
3.03k
443
3.03k
  if (TakenBranch && 
CurrGroupSize > 023
)
444
17
    nextGroup();
445
3.03k
446
3.03k
  assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
447
3.03k
          "Scheduler: unhandled terminator!");
448
3.03k
}
449
450
void SystemZHazardRecognizer::
451
311
copyState(SystemZHazardRecognizer *Incoming) {
452
311
  // Current decoder group
453
311
  CurrGroupSize = Incoming->CurrGroupSize;
454
311
  LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
455
311
456
311
  // Processor resources
457
311
  ProcResourceCounters = Incoming->ProcResourceCounters;
458
311
  CriticalResourceIdx = Incoming->CriticalResourceIdx;
459
311
460
311
  // FPd
461
311
  LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
462
311
  GrpCount = Incoming->GrpCount;
463
311
}