Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/MLxExpansionPass.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
10
// multiple and add / sub instructions) when special VMLx hazards are detected.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "ARM.h"
15
#include "ARMBaseInstrInfo.h"
16
#include "ARMSubtarget.h"
17
#include "llvm/ADT/SmallPtrSet.h"
18
#include "llvm/ADT/Statistic.h"
19
#include "llvm/CodeGen/MachineFunctionPass.h"
20
#include "llvm/CodeGen/MachineInstr.h"
21
#include "llvm/CodeGen/MachineInstrBuilder.h"
22
#include "llvm/CodeGen/MachineRegisterInfo.h"
23
#include "llvm/CodeGen/TargetRegisterInfo.h"
24
#include "llvm/Support/CommandLine.h"
25
#include "llvm/Support/Debug.h"
26
#include "llvm/Support/raw_ostream.h"
27
using namespace llvm;
28
29
#define DEBUG_TYPE "mlx-expansion"
30
31
static cl::opt<bool>
32
ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
33
static cl::opt<unsigned>
34
ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
35
36
STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
37
38
namespace {
39
  struct MLxExpansion : public MachineFunctionPass {
40
    static char ID;
41
4.91k
    MLxExpansion() : MachineFunctionPass(ID) {}
42
43
    bool runOnMachineFunction(MachineFunction &Fn) override;
44
45
30.1k
    StringRef getPassName() const override {
46
30.1k
      return "ARM MLA / MLS expansion pass";
47
30.1k
    }
48
49
  private:
50
    const ARMBaseInstrInfo *TII;
51
    const TargetRegisterInfo *TRI;
52
    MachineRegisterInfo *MRI;
53
54
    bool isLikeA9;
55
    bool isSwift;
56
    unsigned MIIdx;
57
    MachineInstr* LastMIs[4];
58
    SmallPtrSet<MachineInstr*, 4> IgnoreStall;
59
60
    void clearStack();
61
    void pushStack(MachineInstr *MI);
62
    MachineInstr *getAccDefMI(MachineInstr *MI) const;
63
    unsigned getDefReg(MachineInstr *MI) const;
64
    bool hasLoopHazard(MachineInstr *MI) const;
65
    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
66
    bool FindMLxHazard(MachineInstr *MI);
67
    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
68
                                unsigned MulOpc, unsigned AddSubOpc,
69
                                bool NegAcc, bool HasLane);
70
    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
71
  };
72
  char MLxExpansion::ID = 0;
73
}
74
75
487
void MLxExpansion::clearStack() {
76
487
  std::fill(LastMIs, LastMIs + 4, nullptr);
77
487
  MIIdx = 0;
78
487
}
79
80
731
void MLxExpansion::pushStack(MachineInstr *MI) {
81
731
  LastMIs[MIIdx] = MI;
82
731
  if (++MIIdx == 4)
83
130
    MIIdx = 0;
84
731
}
85
86
16
MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
87
16
  // Look past COPY and INSERT_SUBREG instructions to find the
88
16
  // real definition MI. This is important for _sfp instructions.
89
16
  unsigned Reg = MI->getOperand(1).getReg();
90
16
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
91
0
    return nullptr;
92
16
93
16
  MachineBasicBlock *MBB = MI->getParent();
94
16
  MachineInstr *DefMI = MRI->getVRegDef(Reg);
95
16
  while (true) {
96
16
    if (DefMI->getParent() != MBB)
97
0
      break;
98
16
    if (DefMI->isCopyLike()) {
99
7
      Reg = DefMI->getOperand(1).getReg();
100
7
      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
101
0
        DefMI = MRI->getVRegDef(Reg);
102
0
        continue;
103
0
      }
104
9
    } else if (DefMI->isInsertSubreg()) {
105
0
      Reg = DefMI->getOperand(2).getReg();
106
0
      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
107
0
        DefMI = MRI->getVRegDef(Reg);
108
0
        continue;
109
0
      }
110
16
    }
111
16
    break;
112
16
  }
113
16
  return DefMI;
114
16
}
115
116
7
unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
117
7
  unsigned Reg = MI->getOperand(0).getReg();
118
7
  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
119
7
      !MRI->hasOneNonDBGUse(Reg))
120
0
    return Reg;
121
7
122
7
  MachineBasicBlock *MBB = MI->getParent();
123
7
  MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
124
7
  if (UseMI->getParent() != MBB)
125
0
    return Reg;
126
7
127
7
  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
128
0
    Reg = UseMI->getOperand(0).getReg();
129
0
    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
130
0
        !MRI->hasOneNonDBGUse(Reg))
131
0
      return Reg;
132
0
    UseMI = &*MRI->use_instr_nodbg_begin(Reg);
133
0
    if (UseMI->getParent() != MBB)
134
0
      return Reg;
135
0
  }
136
7
137
7
  return Reg;
138
7
}
139
140
/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
141
/// a single-MBB loop.
142
0
bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
143
0
  unsigned Reg = MI->getOperand(1).getReg();
144
0
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
145
0
    return false;
146
0
147
0
  MachineBasicBlock *MBB = MI->getParent();
148
0
  MachineInstr *DefMI = MRI->getVRegDef(Reg);
149
0
  while (true) {
150
0
outer_continue:
151
0
    if (DefMI->getParent() != MBB)
152
0
      break;
153
0
154
0
    if (DefMI->isPHI()) {
155
0
      for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
156
0
        if (DefMI->getOperand(i + 1).getMBB() == MBB) {
157
0
          unsigned SrcReg = DefMI->getOperand(i).getReg();
158
0
          if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
159
0
            DefMI = MRI->getVRegDef(SrcReg);
160
0
            goto outer_continue;
161
0
          }
162
0
        }
163
0
      }
164
0
    } else if (DefMI->isCopyLike()) {
165
0
      Reg = DefMI->getOperand(1).getReg();
166
0
      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
167
0
        DefMI = MRI->getVRegDef(Reg);
168
0
        continue;
169
0
      }
170
0
    } else if (DefMI->isInsertSubreg()) {
171
0
      Reg = DefMI->getOperand(2).getReg();
172
0
      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
173
0
        DefMI = MRI->getVRegDef(Reg);
174
0
        continue;
175
0
      }
176
0
    }
177
0
178
0
    break;
179
0
  }
180
0
181
0
  return DefMI == MI;
182
0
}
183
184
7
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
185
7
  // FIXME: Detect integer instructions properly.
186
7
  const MCInstrDesc &MCID = MI->getDesc();
187
7
  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
188
7
  if (MI->mayStore())
189
1
    return false;
190
6
  unsigned Opcode = MCID.getOpcode();
191
6
  if (Opcode == ARM::VMOVRS || 
Opcode == ARM::VMOVRRD3
)
192
4
    return false;
193
2
  if ((Domain & ARMII::DomainVFP) || 
(Domain & ARMII::DomainNEON)0
)
194
2
    return MI->readsRegister(Reg, TRI);
195
0
  return false;
196
0
}
197
198
static bool isFpMulInstruction(unsigned Opcode) {
199
  switch (Opcode) {
200
  case ARM::VMULS:
201
  case ARM::VMULfd:
202
  case ARM::VMULfq:
203
  case ARM::VMULD:
204
  case ARM::VMULslfd:
205
  case ARM::VMULslfq:
206
    return true;
207
  default:
208
    return false;
209
  }
210
}
211
212
16
bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
213
16
  if (NumExpand >= ExpandLimit)
214
0
    return false;
215
16
216
16
  if (ForceExapnd)
217
0
    return true;
218
16
219
16
  MachineInstr *DefMI = getAccDefMI(MI);
220
16
  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
221
2
    // r0 = vmla
222
2
    // r3 = vmla r0, r1, r2
223
2
    // takes 16 - 17 cycles
224
2
    //
225
2
    // r0 = vmla
226
2
    // r4 = vmul r1, r2
227
2
    // r3 = vadd r0, r4
228
2
    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
229
2
    IgnoreStall.insert(DefMI);
230
2
    return true;
231
2
  }
232
14
233
14
  // On Swift, we mostly care about hazards from multiplication instructions
234
14
  // writing the accumulator and the pipelining of loop iterations by out-of-
235
14
  // order execution.
236
14
  if (isSwift)
237
0
    return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
238
14
239
14
  if (IgnoreStall.count(MI))
240
2
    return false;
241
12
242
12
  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
243
12
  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
244
12
  // preserves the in-order retirement of the instructions.
245
12
  // Look at the next few instructions, if *most* of them can cause hazards,
246
12
  // then the scheduler can't *fix* this, we'd better break up the VMLA.
247
12
  unsigned Limit1 = isLikeA9 ? 1 : 
40
;
248
12
  unsigned Limit2 = isLikeA9 ? 1 : 
40
;
249
60
  for (unsigned i = 1; i <= 4; 
++i48
) {
250
48
    int Idx = ((int)MIIdx - i + 4) % 4;
251
48
    MachineInstr *NextMI = LastMIs[Idx];
252
48
    if (!NextMI)
253
34
      continue;
254
14
255
14
    if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
256
0
      if (i <= Limit1)
257
0
        return true;
258
14
    }
259
14
260
14
    // Look for VMLx RAW hazard.
261
14
    if (i <= Limit2 && 
hasRAWHazard(getDefReg(MI), NextMI)7
)
262
0
      return true;
263
14
  }
264
12
265
12
  return false;
266
12
}
267
268
/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
269
/// of MUL + ADD / SUB instructions.
270
void
271
MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
272
                                     unsigned MulOpc, unsigned AddSubOpc,
273
2
                                     bool NegAcc, bool HasLane) {
274
2
  unsigned DstReg = MI->getOperand(0).getReg();
275
2
  bool DstDead = MI->getOperand(0).isDead();
276
2
  unsigned AccReg = MI->getOperand(1).getReg();
277
2
  unsigned Src1Reg = MI->getOperand(2).getReg();
278
2
  unsigned Src2Reg = MI->getOperand(3).getReg();
279
2
  bool Src1Kill = MI->getOperand(2).isKill();
280
2
  bool Src2Kill = MI->getOperand(3).isKill();
281
2
  unsigned LaneImm = HasLane ? 
MI->getOperand(4).getImm()0
: 0;
282
2
  unsigned NextOp = HasLane ? 
50
: 4;
283
2
  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
284
2
  unsigned PredReg = MI->getOperand(++NextOp).getReg();
285
2
286
2
  const MCInstrDesc &MCID1 = TII->get(MulOpc);
287
2
  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
288
2
  const MachineFunction &MF = *MI->getParent()->getParent();
289
2
  unsigned TmpReg = MRI->createVirtualRegister(
290
2
                      TII->getRegClass(MCID1, 0, TRI, MF));
291
2
292
2
  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
293
2
    .addReg(Src1Reg, getKillRegState(Src1Kill))
294
2
    .addReg(Src2Reg, getKillRegState(Src2Kill));
295
2
  if (HasLane)
296
0
    MIB.addImm(LaneImm);
297
2
  MIB.addImm(Pred).addReg(PredReg);
298
2
299
2
  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
300
2
    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
301
2
302
2
  if (NegAcc) {
303
0
    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
304
0
    MIB.addReg(TmpReg, getKillRegState(true))
305
0
       .addReg(AccReg, getKillRegState(AccKill));
306
2
  } else {
307
2
    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
308
2
  }
309
2
  MIB.addImm(Pred).addReg(PredReg);
310
2
311
2
  LLVM_DEBUG({
312
2
    dbgs() << "Expanding: " << *MI;
313
2
    dbgs() << "  to:\n";
314
2
    MachineBasicBlock::iterator MII = MI;
315
2
    MII = std::prev(MII);
316
2
    MachineInstr &MI2 = *MII;
317
2
    MII = std::prev(MII);
318
2
    MachineInstr &MI1 = *MII;
319
2
    dbgs() << "    " << MI1;
320
2
    dbgs() << "    " << MI2;
321
2
  });
322
2
323
2
  MI->eraseFromParent();
324
2
  ++NumExpand;
325
2
}
326
327
265
bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
328
265
  bool Changed = false;
329
265
330
265
  clearStack();
331
265
  IgnoreStall.clear();
332
265
333
265
  unsigned Skip = 0;
334
265
  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
335
2.61k
  while (MII != E) {
336
2.34k
    MachineInstr *MI = &*MII++;
337
2.34k
338
2.34k
    if (MI->isPosition() || 
MI->isImplicitDef()2.34k
||
MI->isCopy()2.31k
)
339
731
      continue;
340
1.61k
341
1.61k
    const MCInstrDesc &MCID = MI->getDesc();
342
1.61k
    if (MI->isBarrier()) {
343
222
      clearStack();
344
222
      Skip = 0;
345
222
      continue;
346
222
    }
347
1.39k
348
1.39k
    unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
349
1.39k
    if (Domain == ARMII::DomainGeneral) {
350
832
      if (++Skip == 2)
351
170
        // Assume dual issues of non-VFP / NEON instructions.
352
170
        pushStack(nullptr);
353
832
    } else {
354
563
      Skip = 0;
355
563
356
563
      unsigned MulOpc, AddSubOpc;
357
563
      bool NegAcc, HasLane;
358
563
      if (!TII->isFpMLxInstruction(MCID.getOpcode(),
359
563
                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
360
563
          
!FindMLxHazard(MI)16
)
361
561
        pushStack(MI);
362
2
      else {
363
2
        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
364
2
        Changed = true;
365
2
      }
366
563
    }
367
1.39k
  }
368
265
369
265
  return Changed;
370
265
}
371
372
25.2k
bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
373
25.2k
  if (skipFunction(Fn.getFunction()))
374
11
    return false;
375
25.2k
376
25.2k
  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
377
25.2k
  TRI = Fn.getSubtarget().getRegisterInfo();
378
25.2k
  MRI = &Fn.getRegInfo();
379
25.2k
  const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
380
25.2k
  if (!STI->expandMLx())
381
25.0k
    return false;
382
179
  isLikeA9 = STI->isLikeA9() || 
STI->isSwift()25
;
383
179
  isSwift = STI->isSwift();
384
179
385
179
  bool Modified = false;
386
179
  for (MachineBasicBlock &MBB : Fn)
387
265
    Modified |= ExpandFPMLxInstructions(MBB);
388
179
389
179
  return Modified;
390
179
}
391
392
4.91k
FunctionPass *llvm::createMLxExpansionPass() {
393
4.91k
  return new MLxExpansion();
394
4.91k
}