Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
10
// of a MachineFunction.
11
//
12
//   mov %SPL, %depot
13
//   cvta.local %SP, %SPL
14
//
15
// Because Frame Index is a generic address and alloca can only return generic
16
// pointer, without this pass the instructions producing alloca'ed address will
17
// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
18
// this address with their .local versions, but this may introduce a lot of
19
// cvta.to.local instructions. Performance can be improved if we avoid casting
20
// address back and forth and directly calculate local address based on %SPL.
21
// This peephole pass optimizes these cases, for example
22
//
23
// It will transform the following pattern
24
//    %0 = LEA_ADDRi64 %VRFrame, 4
25
//    %1 = cvta_to_local_yes_64 %0
26
//
27
// into
28
//    %1 = LEA_ADDRi64 %VRFrameLocal, 4
29
//
30
// %VRFrameLocal is the virtual register name of %SPL
31
//
32
//===----------------------------------------------------------------------===//
33
34
#include "NVPTX.h"
35
#include "llvm/CodeGen/MachineFunctionPass.h"
36
#include "llvm/CodeGen/MachineInstrBuilder.h"
37
#include "llvm/CodeGen/MachineRegisterInfo.h"
38
#include "llvm/CodeGen/TargetInstrInfo.h"
39
#include "llvm/CodeGen/TargetRegisterInfo.h"
40
41
using namespace llvm;
42
43
#define DEBUG_TYPE "nvptx-peephole"
44
45
namespace llvm {
46
void initializeNVPTXPeepholePass(PassRegistry &);
47
}
48
49
namespace {
50
struct NVPTXPeephole : public MachineFunctionPass {
51
 public:
52
  static char ID;
53
234
  NVPTXPeephole() : MachineFunctionPass(ID) {
54
234
    initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
55
234
  }
56
57
  bool runOnMachineFunction(MachineFunction &MF) override;
58
59
1.30k
  StringRef getPassName() const override {
60
1.30k
    return "NVPTX optimize redundant cvta.to.local instruction";
61
1.30k
  }
62
63
230
  void getAnalysisUsage(AnalysisUsage &AU) const override {
64
230
    MachineFunctionPass::getAnalysisUsage(AU);
65
230
  }
66
};
67
}
68
69
char NVPTXPeephole::ID = 0;
70
71
INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
72
73
7.46k
static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
74
7.46k
  auto &MBB = *Root.getParent();
75
7.46k
  auto &MF = *MBB.getParent();
76
7.46k
  // Check current instruction is cvta.to.local
77
7.46k
  if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
78
7.46k
      
Root.getOpcode() != NVPTX::cvta_to_local_yes7.46k
)
79
7.45k
    return false;
80
12
81
12
  auto &Op = Root.getOperand(1);
82
12
  const auto &MRI = MF.getRegInfo();
83
12
  MachineInstr *GenericAddrDef = nullptr;
84
12
  if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
85
12
    GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
86
12
  }
87
12
88
12
  // Check the register operand is uniquely defined by LEA_ADDRi instruction
89
12
  if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
90
12
      (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
91
12
       
GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi5
)) {
92
0
    return false;
93
0
  }
94
12
95
12
  // Check the LEA_ADDRi operand is Frame index
96
12
  auto &BaseAddrOp = GenericAddrDef->getOperand(1);
97
12
  if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
98
12
    return true;
99
12
  }
100
0
101
0
  return false;
102
0
}
103
104
12
static void CombineCVTAToLocal(MachineInstr &Root) {
105
12
  auto &MBB = *Root.getParent();
106
12
  auto &MF = *MBB.getParent();
107
12
  const auto &MRI = MF.getRegInfo();
108
12
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
109
12
  auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
110
12
111
12
  MachineInstrBuilder MIB =
112
12
      BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
113
12
              Root.getOperand(0).getReg())
114
12
          .addReg(NVPTX::VRFrameLocal)
115
12
          .add(Prev.getOperand(2));
116
12
117
12
  MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
118
12
119
12
  // Check if MRI has only one non dbg use, which is Root
120
12
  if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
121
3
    Prev.eraseFromParentAndMarkDBGValuesForRemoval();
122
3
  }
123
12
  Root.eraseFromParentAndMarkDBGValuesForRemoval();
124
12
}
125
126
1.07k
bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
127
1.07k
  if (skipFunction(MF.getFunction()))
128
0
    return false;
129
1.07k
130
1.07k
  bool Changed = false;
131
1.07k
  // Loop over all of the basic blocks.
132
1.20k
  for (auto &MBB : MF) {
133
1.20k
    // Traverse the basic block.
134
1.20k
    auto BlockIter = MBB.begin();
135
1.20k
136
8.66k
    while (BlockIter != MBB.end()) {
137
7.46k
      auto &MI = *BlockIter++;
138
7.46k
      if (isCVTAToLocalCombinationCandidate(MI)) {
139
12
        CombineCVTAToLocal(MI);
140
12
        Changed = true;
141
12
      }
142
7.46k
    }  // Instruction
143
1.20k
  }    // Basic Block
144
1.07k
145
1.07k
  // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
146
1.07k
  const auto &MRI = MF.getRegInfo();
147
1.07k
  if (MRI.use_empty(NVPTX::VRFrame)) {
148
1.06k
    if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
149
2
      MI->eraseFromParentAndMarkDBGValuesForRemoval();
150
2
    }
151
1.06k
  }
152
1.07k
153
1.07k
  return Changed;
154
1.07k
}
155
156
234
MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }