Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass mutates the form of VSX FMA instructions to avoid unnecessary
10
// copies.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "MCTargetDesc/PPCPredicates.h"
15
#include "PPC.h"
16
#include "PPCInstrBuilder.h"
17
#include "PPCInstrInfo.h"
18
#include "PPCMachineFunctionInfo.h"
19
#include "PPCTargetMachine.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/CodeGen/LiveIntervals.h"
23
#include "llvm/CodeGen/MachineDominators.h"
24
#include "llvm/CodeGen/MachineFrameInfo.h"
25
#include "llvm/CodeGen/MachineFunctionPass.h"
26
#include "llvm/CodeGen/MachineInstrBuilder.h"
27
#include "llvm/CodeGen/MachineMemOperand.h"
28
#include "llvm/CodeGen/MachineRegisterInfo.h"
29
#include "llvm/CodeGen/PseudoSourceValue.h"
30
#include "llvm/CodeGen/ScheduleDAG.h"
31
#include "llvm/CodeGen/SlotIndexes.h"
32
#include "llvm/MC/MCAsmInfo.h"
33
#include "llvm/Support/CommandLine.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/TargetRegistry.h"
37
#include "llvm/Support/raw_ostream.h"
38
39
using namespace llvm;
40
41
// Temporarily disable FMA mutation by default, since it doesn't handle
42
// cross-basic-block intervals well.
43
// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
44
//      http://reviews.llvm.org/D17087
45
static cl::opt<bool> DisableVSXFMAMutate(
46
    "disable-ppc-vsx-fma-mutation",
47
    cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
48
    cl::Hidden);
49
50
#define DEBUG_TYPE "ppc-vsx-fma-mutate"
51
52
namespace llvm { namespace PPC {
53
  int getAltVSXFMAOpcode(uint16_t Opcode);
54
} }
55
56
namespace {
57
  // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
58
  // (Altivec and scalar floating-point registers), we need to transform the
59
  // copies into subregister copies with other restrictions.
60
  struct PPCVSXFMAMutate : public MachineFunctionPass {
61
    static char ID;
62
1.63k
    PPCVSXFMAMutate() : MachineFunctionPass(ID) {
63
1.63k
      initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
64
1.63k
    }
65
66
    LiveIntervals *LIS;
67
    const PPCInstrInfo *TII;
68
69
protected:
70
84
    bool processBlock(MachineBasicBlock &MBB) {
71
84
      bool Changed = false;
72
84
73
84
      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
74
84
      const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
75
84
      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
76
778
           I != IE; 
++I694
) {
77
694
        MachineInstr &MI = *I;
78
694
79
694
        // The default (A-type) VSX FMA form kills the addend (it is taken from
80
694
        // the target register, which is then updated to reflect the result of
81
694
        // the FMA). If the instruction, however, kills one of the registers
82
694
        // used for the product, then we can use the M-form instruction (which
83
694
        // will take that value from the to-be-defined register).
84
694
85
694
        int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
86
694
        if (AltOpc == -1)
87
589
          continue;
88
105
89
105
        // This pass is run after register coalescing, and so we're looking for
90
105
        // a situation like this:
91
105
        //   ...
92
105
        //   %5 = COPY %9; VSLRC:%5,%9
93
105
        //   %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
94
105
        //                         implicit %rm; VSLRC:%5,%17,%16
95
105
        //   ...
96
105
        //   %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
97
105
        //                         implicit %rm; VSLRC:%9,%17,%19
98
105
        //   ...
99
105
        // Where we can eliminate the copy by changing from the A-type to the
100
105
        // M-type instruction. Specifically, for this example, this means:
101
105
        //   %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
102
105
        //                         implicit %rm; VSLRC:%5,%17,%16
103
105
        // is replaced by:
104
105
        //   %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
105
105
        //                         implicit %rm; VSLRC:%16,%18,%9
106
105
        // and we remove: %5 = COPY %9; VSLRC:%5,%9
107
105
108
105
        SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
109
105
110
105
        VNInfo *AddendValNo =
111
105
            LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
112
105
113
105
        // This can be null if the register is undef.
114
105
        if (!AddendValNo)
115
0
          continue;
116
105
117
105
        MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
118
105
119
105
        // The addend and this instruction must be in the same block.
120
105
121
105
        if (!AddendMI || AddendMI->getParent() != MI.getParent())
122
0
          continue;
123
105
124
105
        // The addend must be a full copy within the same register class.
125
105
126
105
        if (!AddendMI->isFullCopy())
127
25
          continue;
128
80
129
80
        unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
130
80
        if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
131
5
          if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
132
5
              MRI.getRegClass(AddendSrcReg))
133
0
            continue;
134
75
        } else {
135
75
          // If AddendSrcReg is a physical register, make sure the destination
136
75
          // register class contains it.
137
75
          if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
138
75
                ->contains(AddendSrcReg))
139
0
            continue;
140
80
        }
141
80
142
80
        // In theory, there could be other uses of the addend copy before this
143
80
        // fma.  We could deal with this, but that would require additional
144
80
        // logic below and I suspect it will not occur in any relevant
145
80
        // situations.  Additionally, check whether the copy source is killed
146
80
        // prior to the fma.  In order to replace the addend here with the
147
80
        // source of the copy, it must still be live here.  We can't use
148
80
        // interval testing for a physical register, so as long as we're
149
80
        // walking the MIs we may as well test liveness here.
150
80
        //
151
80
        // FIXME: There is a case that occurs in practice, like this:
152
80
        //   %9 = COPY %f1; VSSRC:%9
153
80
        //   ...
154
80
        //   %6 = COPY %9; VSSRC:%6,%9
155
80
        //   %7 = COPY %9; VSSRC:%7,%9
156
80
        //   %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
157
80
        //   %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
158
80
        //   %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
159
80
        // which prevents an otherwise-profitable transformation.
160
80
        bool OtherUsers = false, KillsAddendSrc = false;
161
80
        for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
162
256
             J != JE; 
--J176
) {
163
184
          if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
164
4
            OtherUsers = true;
165
4
            break;
166
4
          }
167
180
          if (J->modifiesRegister(AddendSrcReg, TRI) ||
168
180
              
J->killsRegister(AddendSrcReg, TRI)176
) {
169
4
            KillsAddendSrc = true;
170
4
            break;
171
4
          }
172
180
        }
173
80
174
80
        if (OtherUsers || 
KillsAddendSrc76
)
175
8
          continue;
176
72
177
72
178
72
        // The transformation doesn't work well with things like:
179
72
        //    %5 = A-form-op %5, %11, %5;
180
72
        // unless %11 is also a kill, so skip when it is not,
181
72
        // and check operand 3 to see it is also a kill to handle the case:
182
72
        //   %5 = A-form-op %5, %5, %11;
183
72
        // where %5 and %11 are both kills. This case would be skipped
184
72
        // otherwise.
185
72
        unsigned OldFMAReg = MI.getOperand(0).getReg();
186
72
187
72
        // Find one of the product operands that is killed by this instruction.
188
72
        unsigned KilledProdOp = 0, OtherProdOp = 0;
189
72
        unsigned Reg2 = MI.getOperand(2).getReg();
190
72
        unsigned Reg3 = MI.getOperand(3).getReg();
191
72
        if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
192
72
            && 
Reg2 != OldFMAReg65
) {
193
65
          KilledProdOp = 2;
194
65
          OtherProdOp  = 3;
195
65
        } else 
if (7
LIS->getInterval(Reg3).Query(FMAIdx).isKill()7
196
7
            && 
Reg3 != OldFMAReg0
) {
197
0
          KilledProdOp = 3;
198
0
          OtherProdOp  = 2;
199
0
        }
200
72
201
72
        // If there are no usable killed product operands, then this
202
72
        // transformation is likely not profitable.
203
72
        if (!KilledProdOp)
204
7
          continue;
205
65
206
65
        // If the addend copy is used only by this MI, then the addend source
207
65
        // register is likely not live here. This could be fixed (based on the
208
65
        // legality checks above, the live range for the addend source register
209
65
        // could be extended), but it seems likely that such a trivial copy can
210
65
        // be coalesced away later, and thus is not worth the effort.
211
65
        if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
212
65
            
!LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)1
)
213
0
          continue;
214
65
215
65
        // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
216
65
217
65
        unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg();
218
65
        unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg();
219
65
220
65
        unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
221
65
        unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
222
65
        unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
223
65
224
65
        bool AddRegKill = AddendMI->getOperand(1).isKill();
225
65
        bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
226
65
        bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
227
65
228
65
        bool AddRegUndef = AddendMI->getOperand(1).isUndef();
229
65
        bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
230
65
        bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
231
65
232
65
        // If there isn't a class that fits, we can't perform the transform.
233
65
        // This is needed for correctness with a mixture of VSX and Altivec
234
65
        // instructions to make sure that a low VSX register is not assigned to
235
65
        // the Altivec instruction.
236
65
        if (!MRI.constrainRegClass(KilledProdReg,
237
65
                                   MRI.getRegClass(OldFMAReg)))
238
0
          continue;
239
65
240
65
        assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
241
65
               "Addend copy not tied to old FMA output!");
242
65
243
65
        LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n    " << MI);
244
65
245
65
        MI.getOperand(0).setReg(KilledProdReg);
246
65
        MI.getOperand(1).setReg(KilledProdReg);
247
65
        MI.getOperand(3).setReg(AddendSrcReg);
248
65
249
65
        MI.getOperand(0).setSubReg(KilledProdSubReg);
250
65
        MI.getOperand(1).setSubReg(KilledProdSubReg);
251
65
        MI.getOperand(3).setSubReg(AddSubReg);
252
65
253
65
        MI.getOperand(1).setIsKill(KilledProdRegKill);
254
65
        MI.getOperand(3).setIsKill(AddRegKill);
255
65
256
65
        MI.getOperand(1).setIsUndef(KilledProdRegUndef);
257
65
        MI.getOperand(3).setIsUndef(AddRegUndef);
258
65
259
65
        MI.setDesc(TII->get(AltOpc));
260
65
261
65
        // If the addend is also a multiplicand, replace it with the addend
262
65
        // source in both places.
263
65
        if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
264
0
          MI.getOperand(2).setReg(AddendSrcReg);
265
0
          MI.getOperand(2).setSubReg(AddSubReg);
266
0
          MI.getOperand(2).setIsKill(AddRegKill);
267
0
          MI.getOperand(2).setIsUndef(AddRegUndef);
268
65
        } else {
269
65
          MI.getOperand(2).setReg(OtherProdReg);
270
65
          MI.getOperand(2).setSubReg(OtherProdSubReg);
271
65
          MI.getOperand(2).setIsKill(OtherProdRegKill);
272
65
          MI.getOperand(2).setIsUndef(OtherProdRegUndef);
273
65
        }
274
65
275
65
        LLVM_DEBUG(dbgs() << " -> " << MI);
276
65
277
65
        // The killed product operand was killed here, so we can reuse it now
278
65
        // for the result of the fma.
279
65
280
65
        LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
281
65
        VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
282
65
        for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
283
235
             UI != UE;) {
284
170
          MachineOperand &UseMO = *UI;
285
170
          MachineInstr *UseMI = UseMO.getParent();
286
170
          ++UI;
287
170
288
170
          // Don't replace the result register of the copy we're about to erase.
289
170
          if (UseMI == AddendMI)
290
65
            continue;
291
105
292
105
          UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
293
105
        }
294
65
295
65
        // Extend the live intervals of the killed product operand to hold the
296
65
        // fma result.
297
65
298
65
        LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
299
65
        for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
300
215
             AI != AE; 
++AI150
) {
301
150
          // Don't add the segment that corresponds to the original copy.
302
150
          if (AI->valno == AddendValNo)
303
65
            continue;
304
85
305
85
          VNInfo *NewFMAValNo =
306
85
            NewFMAInt.getNextValue(AI->start,
307
85
                                   LIS->getVNInfoAllocator());
308
85
309
85
          NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
310
85
                                                     NewFMAValNo));
311
85
        }
312
65
        LLVM_DEBUG(dbgs() << "  extended: " << NewFMAInt << '\n');
313
65
314
65
        // Extend the live interval of the addend source (it might end at the
315
65
        // copy to be removed, or somewhere in between there and here). This
316
65
        // is necessary only if it is a physical register.
317
65
        if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg))
318
128
          
for (MCRegUnitIterator Units(AddendSrcReg, TRI); 64
Units.isValid();
319
64
               ++Units) {
320
64
            unsigned Unit = *Units;
321
64
322
64
            LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
323
64
            AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
324
64
                                         FMAIdx.getRegSlot());
325
64
            LLVM_DEBUG(dbgs() << "  extended: " << AddendSrcRange << '\n');
326
64
          }
327
65
328
65
        FMAInt.removeValNo(FMAValNo);
329
65
        LLVM_DEBUG(dbgs() << "  trimmed:  " << FMAInt << '\n');
330
65
331
65
        // Remove the (now unused) copy.
332
65
333
65
        LLVM_DEBUG(dbgs() << "  removing: " << *AddendMI << '\n');
334
65
        LIS->RemoveMachineInstrFromMaps(*AddendMI);
335
65
        AddendMI->eraseFromParent();
336
65
337
65
        Changed = true;
338
65
      }
339
84
340
84
      return Changed;
341
84
    }
342
343
public:
344
10.4k
    bool runOnMachineFunction(MachineFunction &MF) override {
345
10.4k
      if (skipFunction(MF.getFunction()))
346
1
        return false;
347
10.4k
348
10.4k
      // If we don't have VSX then go ahead and return without doing
349
10.4k
      // anything.
350
10.4k
      const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
351
10.4k
      if (!STI.hasVSX())
352
2.49k
        return false;
353
7.97k
354
7.97k
      LIS = &getAnalysis<LiveIntervals>();
355
7.97k
356
7.97k
      TII = STI.getInstrInfo();
357
7.97k
358
7.97k
      bool Changed = false;
359
7.97k
360
7.97k
      if (DisableVSXFMAMutate)
361
7.89k
        return Changed;
362
82
363
166
      
for (MachineFunction::iterator I = MF.begin(); 82
I != MF.end();) {
364
84
        MachineBasicBlock &B = *I++;
365
84
        if (processBlock(B))
366
65
          Changed = true;
367
84
      }
368
82
369
82
      return Changed;
370
82
    }
371
372
1.63k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
373
1.63k
      AU.addRequired<LiveIntervals>();
374
1.63k
      AU.addPreserved<LiveIntervals>();
375
1.63k
      AU.addRequired<SlotIndexes>();
376
1.63k
      AU.addPreserved<SlotIndexes>();
377
1.63k
      AU.addRequired<MachineDominatorTree>();
378
1.63k
      AU.addPreserved<MachineDominatorTree>();
379
1.63k
      MachineFunctionPass::getAnalysisUsage(AU);
380
1.63k
    }
381
  };
382
}
383
384
101k
INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
385
101k
                      "PowerPC VSX FMA Mutation", false, false)
386
101k
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
387
101k
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
388
101k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
389
101k
INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
390
                    "PowerPC VSX FMA Mutation", false, false)
391
392
char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
393
394
char PPCVSXFMAMutate::ID = 0;
395
0
FunctionPass *llvm::createPPCVSXFMAMutatePass() {
396
0
  return new PPCVSXFMAMutate();
397
0
}