Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/ARM/MLxExpansionPass.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
11
// multiple and add / sub instructions) when special VMLx hazards are detected.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "ARM.h"
16
#include "ARMBaseInstrInfo.h"
17
#include "ARMSubtarget.h"
18
#include "llvm/ADT/SmallPtrSet.h"
19
#include "llvm/ADT/Statistic.h"
20
#include "llvm/CodeGen/MachineFunctionPass.h"
21
#include "llvm/CodeGen/MachineInstr.h"
22
#include "llvm/CodeGen/MachineInstrBuilder.h"
23
#include "llvm/CodeGen/MachineRegisterInfo.h"
24
#include "llvm/Support/CommandLine.h"
25
#include "llvm/Support/Debug.h"
26
#include "llvm/Support/raw_ostream.h"
27
#include "llvm/Target/TargetRegisterInfo.h"
28
using namespace llvm;
29
30
#define DEBUG_TYPE "mlx-expansion"
31
32
static cl::opt<bool>
33
ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
34
static cl::opt<unsigned>
35
ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
36
37
STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
38
39
namespace {
40
  struct MLxExpansion : public MachineFunctionPass {
41
    static char ID;
42
4.12k
    MLxExpansion() : MachineFunctionPass(ID) {}
43
44
    bool runOnMachineFunction(MachineFunction &Fn) override;
45
46
4.12k
    StringRef getPassName() const override {
47
4.12k
      return "ARM MLA / MLS expansion pass";
48
4.12k
    }
49
50
  private:
51
    const ARMBaseInstrInfo *TII;
52
    const TargetRegisterInfo *TRI;
53
    MachineRegisterInfo *MRI;
54
55
    bool isLikeA9;
56
    bool isSwift;
57
    unsigned MIIdx;
58
    MachineInstr* LastMIs[4];
59
    SmallPtrSet<MachineInstr*, 4> IgnoreStall;
60
61
    void clearStack();
62
    void pushStack(MachineInstr *MI);
63
    MachineInstr *getAccDefMI(MachineInstr *MI) const;
64
    unsigned getDefReg(MachineInstr *MI) const;
65
    bool hasLoopHazard(MachineInstr *MI) const;
66
    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
67
    bool FindMLxHazard(MachineInstr *MI);
68
    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
69
                                unsigned MulOpc, unsigned AddSubOpc,
70
                                bool NegAcc, bool HasLane);
71
    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
72
  };
73
  char MLxExpansion::ID = 0;
74
}
75
76
434
void MLxExpansion::clearStack() {
77
434
  std::fill(LastMIs, LastMIs + 4, nullptr);
78
434
  MIIdx = 0;
79
434
}
80
81
782
void MLxExpansion::pushStack(MachineInstr *MI) {
82
782
  LastMIs[MIIdx] = MI;
83
782
  if (++MIIdx == 4)
84
141
    MIIdx = 0;
85
782
}
86
87
16
MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
88
16
  // Look past COPY and INSERT_SUBREG instructions to find the
89
16
  // real definition MI. This is important for _sfp instructions.
90
16
  unsigned Reg = MI->getOperand(1).getReg();
91
16
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
92
0
    return nullptr;
93
16
94
16
  MachineBasicBlock *MBB = MI->getParent();
95
16
  MachineInstr *DefMI = MRI->getVRegDef(Reg);
96
16
  while (
true16
) {
97
16
    if (DefMI->getParent() != MBB)
98
0
      break;
99
16
    
if (16
DefMI->isCopyLike()16
) {
100
6
      Reg = DefMI->getOperand(1).getReg();
101
6
      if (
TargetRegisterInfo::isVirtualRegister(Reg)6
) {
102
0
        DefMI = MRI->getVRegDef(Reg);
103
0
        continue;
104
0
      }
105
10
    } else 
if (10
DefMI->isInsertSubreg()10
) {
106
0
      Reg = DefMI->getOperand(2).getReg();
107
0
      if (
TargetRegisterInfo::isVirtualRegister(Reg)0
) {
108
0
        DefMI = MRI->getVRegDef(Reg);
109
0
        continue;
110
0
      }
111
16
    }
112
16
    break;
113
16
  }
114
16
  return DefMI;
115
16
}
116
117
8
unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
118
8
  unsigned Reg = MI->getOperand(0).getReg();
119
8
  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
120
8
      !MRI->hasOneNonDBGUse(Reg))
121
0
    return Reg;
122
8
123
8
  MachineBasicBlock *MBB = MI->getParent();
124
8
  MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
125
8
  if (UseMI->getParent() != MBB)
126
0
    return Reg;
127
8
128
8
  
while (8
UseMI->isCopy() || 8
UseMI->isInsertSubreg()8
) {
129
0
    Reg = UseMI->getOperand(0).getReg();
130
0
    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
131
0
        !MRI->hasOneNonDBGUse(Reg))
132
0
      return Reg;
133
0
    UseMI = &*MRI->use_instr_nodbg_begin(Reg);
134
0
    if (UseMI->getParent() != MBB)
135
0
      return Reg;
136
0
  }
137
8
138
8
  return Reg;
139
8
}
140
141
/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
142
/// a single-MBB loop.
143
0
bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
144
0
  unsigned Reg = MI->getOperand(1).getReg();
145
0
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
146
0
    return false;
147
0
148
0
  MachineBasicBlock *MBB = MI->getParent();
149
0
  MachineInstr *DefMI = MRI->getVRegDef(Reg);
150
0
  while (
true0
) {
151
0
outer_continue:
152
0
    if (DefMI->getParent() != MBB)
153
0
      break;
154
0
155
0
    
if (0
DefMI->isPHI()0
) {
156
0
      for (unsigned i = 1, e = DefMI->getNumOperands(); 
i < e0
;
i += 20
) {
157
0
        if (
DefMI->getOperand(i + 1).getMBB() == MBB0
) {
158
0
          unsigned SrcReg = DefMI->getOperand(i).getReg();
159
0
          if (
TargetRegisterInfo::isVirtualRegister(SrcReg)0
) {
160
0
            DefMI = MRI->getVRegDef(SrcReg);
161
0
            goto outer_continue;
162
0
          }
163
0
        }
164
0
      }
165
0
    } else 
if (0
DefMI->isCopyLike()0
) {
166
0
      Reg = DefMI->getOperand(1).getReg();
167
0
      if (
TargetRegisterInfo::isVirtualRegister(Reg)0
) {
168
0
        DefMI = MRI->getVRegDef(Reg);
169
0
        continue;
170
0
      }
171
0
    } else 
if (0
DefMI->isInsertSubreg()0
) {
172
0
      Reg = DefMI->getOperand(2).getReg();
173
0
      if (
TargetRegisterInfo::isVirtualRegister(Reg)0
) {
174
0
        DefMI = MRI->getVRegDef(Reg);
175
0
        continue;
176
0
      }
177
0
    }
178
0
179
0
    break;
180
0
  }
181
0
182
0
  return DefMI == MI;
183
0
}
184
185
8
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
186
8
  // FIXME: Detect integer instructions properly.
187
8
  const MCInstrDesc &MCID = MI->getDesc();
188
8
  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
189
8
  if (MI->mayStore())
190
2
    return false;
191
6
  unsigned Opcode = MCID.getOpcode();
192
6
  if (
Opcode == ARM::VMOVRS || 6
Opcode == ARM::VMOVRRD4
)
193
4
    return false;
194
2
  
if (2
(Domain & ARMII::DomainVFP) || 2
(Domain & ARMII::DomainNEON)0
)
195
2
    return MI->readsRegister(Reg, TRI);
196
0
  return false;
197
0
}
198
199
0
static bool isFpMulInstruction(unsigned Opcode) {
200
0
  switch (Opcode) {
201
0
  case ARM::VMULS:
202
0
  case ARM::VMULfd:
203
0
  case ARM::VMULfq:
204
0
  case ARM::VMULD:
205
0
  case ARM::VMULslfd:
206
0
  case ARM::VMULslfq:
207
0
    return true;
208
0
  default:
209
0
    return false;
210
0
  }
211
0
}
212
213
16
bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
214
16
  if (NumExpand >= ExpandLimit)
215
0
    return false;
216
16
217
16
  
if (16
ForceExapnd16
)
218
0
    return true;
219
16
220
16
  MachineInstr *DefMI = getAccDefMI(MI);
221
16
  if (
TII->isFpMLxInstruction(DefMI->getOpcode())16
) {
222
2
    // r0 = vmla
223
2
    // r3 = vmla r0, r1, r2
224
2
    // takes 16 - 17 cycles
225
2
    //
226
2
    // r0 = vmla
227
2
    // r4 = vmul r1, r2
228
2
    // r3 = vadd r0, r4
229
2
    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
230
2
    IgnoreStall.insert(DefMI);
231
2
    return true;
232
2
  }
233
14
234
14
  // On Swift, we mostly care about hazards from multiplication instructions
235
14
  // writing the accumulator and the pipelining of loop iterations by out-of-
236
14
  // order execution. 
237
14
  
if (14
isSwift14
)
238
0
    
return isFpMulInstruction(DefMI->getOpcode()) || 0
hasLoopHazard(MI)0
;
239
14
240
14
  
if (14
IgnoreStall.count(MI)14
)
241
2
    return false;
242
12
243
12
  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
244
12
  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
245
12
  // preserves the in-order retirement of the instructions.
246
12
  // Look at the next few instructions, if *most* of them can cause hazards,
247
12
  // then the scheduler can't *fix* this, we'd better break up the VMLA.
248
12
  
unsigned Limit1 = isLikeA9 ? 12
112
:
40
;
249
12
  unsigned Limit2 = isLikeA9 ? 
112
:
40
;
250
60
  for (unsigned i = 1; 
i <= 460
;
++i48
) {
251
48
    int Idx = ((int)MIIdx - i + 4) % 4;
252
48
    MachineInstr *NextMI = LastMIs[Idx];
253
48
    if (!NextMI)
254
30
      continue;
255
18
256
18
    
if (18
TII->canCauseFpMLxStall(NextMI->getOpcode())18
) {
257
0
      if (i <= Limit1)
258
0
        return true;
259
18
    }
260
18
261
18
    // Look for VMLx RAW hazard.
262
18
    
if (18
i <= Limit2 && 18
hasRAWHazard(getDefReg(MI), NextMI)8
)
263
0
      return true;
264
48
  }
265
12
266
12
  return false;
267
16
}
268
269
/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
270
/// of MUL + ADD / SUB instructions.
271
void
272
MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
273
                                     unsigned MulOpc, unsigned AddSubOpc,
274
2
                                     bool NegAcc, bool HasLane) {
275
2
  unsigned DstReg = MI->getOperand(0).getReg();
276
2
  bool DstDead = MI->getOperand(0).isDead();
277
2
  unsigned AccReg = MI->getOperand(1).getReg();
278
2
  unsigned Src1Reg = MI->getOperand(2).getReg();
279
2
  unsigned Src2Reg = MI->getOperand(3).getReg();
280
2
  bool Src1Kill = MI->getOperand(2).isKill();
281
2
  bool Src2Kill = MI->getOperand(3).isKill();
282
2
  unsigned LaneImm = HasLane ? 
MI->getOperand(4).getImm()0
:
02
;
283
2
  unsigned NextOp = HasLane ? 
50
:
42
;
284
2
  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
285
2
  unsigned PredReg = MI->getOperand(++NextOp).getReg();
286
2
287
2
  const MCInstrDesc &MCID1 = TII->get(MulOpc);
288
2
  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
289
2
  const MachineFunction &MF = *MI->getParent()->getParent();
290
2
  unsigned TmpReg = MRI->createVirtualRegister(
291
2
                      TII->getRegClass(MCID1, 0, TRI, MF));
292
2
293
2
  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
294
2
    .addReg(Src1Reg, getKillRegState(Src1Kill))
295
2
    .addReg(Src2Reg, getKillRegState(Src2Kill));
296
2
  if (HasLane)
297
0
    MIB.addImm(LaneImm);
298
2
  MIB.addImm(Pred).addReg(PredReg);
299
2
300
2
  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
301
2
    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
302
2
303
2
  if (
NegAcc2
) {
304
0
    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
305
0
    MIB.addReg(TmpReg, getKillRegState(true))
306
0
       .addReg(AccReg, getKillRegState(AccKill));
307
2
  } else {
308
2
    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
309
2
  }
310
2
  MIB.addImm(Pred).addReg(PredReg);
311
2
312
2
  DEBUG({
313
2
      dbgs() << "Expanding: " << *MI;
314
2
      dbgs() << "  to:\n";
315
2
      MachineBasicBlock::iterator MII = MI;
316
2
      MII = std::prev(MII);
317
2
      MachineInstr &MI2 = *MII;
318
2
      MII = std::prev(MII);
319
2
      MachineInstr &MI1 = *MII;
320
2
      dbgs() << "    " << MI1;
321
2
      dbgs() << "    " << MI2;
322
2
   });
323
2
324
2
  MI->eraseFromParent();
325
2
  ++NumExpand;
326
2
}
327
328
239
bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
329
239
  bool Changed = false;
330
239
331
239
  clearStack();
332
239
  IgnoreStall.clear();
333
239
334
239
  unsigned Skip = 0;
335
239
  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
336
2.69k
  while (
MII != E2.69k
) {
337
2.45k
    MachineInstr *MI = &*MII++;
338
2.45k
339
2.45k
    if (
MI->isPosition() || 2.45k
MI->isImplicitDef()2.45k
||
MI->isCopy()2.35k
)
340
799
      continue;
341
1.65k
342
1.65k
    const MCInstrDesc &MCID = MI->getDesc();
343
1.65k
    if (
MI->isBarrier()1.65k
) {
344
195
      clearStack();
345
195
      Skip = 0;
346
195
      continue;
347
195
    }
348
1.46k
349
1.46k
    unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
350
1.46k
    if (
Domain == ARMII::DomainGeneral1.46k
) {
351
858
      if (++Skip == 2)
352
858
        // Assume dual issues of non-VFP / NEON instructions.
353
180
        pushStack(nullptr);
354
1.46k
    } else {
355
604
      Skip = 0;
356
604
357
604
      unsigned MulOpc, AddSubOpc;
358
604
      bool NegAcc, HasLane;
359
604
      if (!TII->isFpMLxInstruction(MCID.getOpcode(),
360
604
                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
361
16
          !FindMLxHazard(MI))
362
602
        pushStack(MI);
363
2
      else {
364
2
        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
365
2
        Changed = true;
366
2
      }
367
604
    }
368
2.45k
  }
369
239
370
239
  return Changed;
371
239
}
372
373
15.8k
bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
374
15.8k
  if (skipFunction(*Fn.getFunction()))
375
8
    return false;
376
15.8k
377
15.8k
  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
378
15.8k
  TRI = Fn.getSubtarget().getRegisterInfo();
379
15.8k
  MRI = &Fn.getRegInfo();
380
15.8k
  const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
381
15.8k
  if (!STI->expandMLx())
382
15.7k
    return false;
383
152
  
isLikeA9 = STI->isLikeA9() || 152
STI->isSwift()0
;
384
152
  isSwift = STI->isSwift();
385
152
386
152
  bool Modified = false;
387
152
  for (MachineBasicBlock &MBB : Fn)
388
239
    Modified |= ExpandFPMLxInstructions(MBB);
389
15.8k
390
15.8k
  return Modified;
391
15.8k
}
392
393
4.12k
FunctionPass *llvm::createMLxExpansionPass() {
394
4.12k
  return new MLxExpansion();
395
4.12k
}