Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// \file
10
// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
11
// the size is large or is not a compile-time constant.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "NVPTXLowerAggrCopies.h"
16
#include "llvm/Analysis/TargetTransformInfo.h"
17
#include "llvm/CodeGen/StackProtector.h"
18
#include "llvm/IR/Constants.h"
19
#include "llvm/IR/DataLayout.h"
20
#include "llvm/IR/Function.h"
21
#include "llvm/IR/IRBuilder.h"
22
#include "llvm/IR/Instructions.h"
23
#include "llvm/IR/IntrinsicInst.h"
24
#include "llvm/IR/Intrinsics.h"
25
#include "llvm/IR/LLVMContext.h"
26
#include "llvm/IR/Module.h"
27
#include "llvm/Support/Debug.h"
28
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
29
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
30
31
#define DEBUG_TYPE "nvptx"
32
33
using namespace llvm;
34
35
namespace {
36
37
// actual analysis class, which is a functionpass
38
struct NVPTXLowerAggrCopies : public FunctionPass {
39
  static char ID;
40
41
264
  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
42
43
262
  void getAnalysisUsage(AnalysisUsage &AU) const override {
44
262
    AU.addPreserved<StackProtector>();
45
262
    AU.addRequired<TargetTransformInfoWrapperPass>();
46
262
  }
47
48
  bool runOnFunction(Function &F) override;
49
50
  static const unsigned MaxAggrCopySize = 128;
51
52
1.70k
  StringRef getPassName() const override {
53
1.70k
    return "Lower aggregate copies/intrinsics into loops";
54
1.70k
  }
55
};
56
57
char NVPTXLowerAggrCopies::ID = 0;
58
59
1.69k
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
60
1.69k
  SmallVector<LoadInst *, 4> AggrLoads;
61
1.69k
  SmallVector<MemIntrinsic *, 4> MemCalls;
62
1.69k
63
1.69k
  const DataLayout &DL = F.getParent()->getDataLayout();
64
1.69k
  LLVMContext &Context = F.getParent()->getContext();
65
1.69k
  const TargetTransformInfo &TTI =
66
1.69k
      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
67
1.69k
68
1.69k
  // Collect all aggregate loads and mem* calls.
69
3.53k
  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; 
++BI1.84k
) {
70
7.34k
    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
71
5.50k
         ++II) {
72
5.50k
      if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
73
365
        if (!LI->hasOneUse())
74
30
          continue;
75
335
76
335
        if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
77
335
          continue;
78
0
79
0
        if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
80
0
          if (SI->getOperand(0) != LI)
81
0
            continue;
82
0
          AggrLoads.push_back(LI);
83
0
        }
84
5.14k
      } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
85
14
        // Convert intrinsic calls with variable size or with constant size
86
14
        // larger than the MaxAggrCopySize threshold.
87
14
        if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
88
2
          if (LenCI->getZExtValue() >= MaxAggrCopySize) {
89
2
            MemCalls.push_back(IntrCall);
90
2
          }
91
12
        } else {
92
12
          MemCalls.push_back(IntrCall);
93
12
        }
94
14
      }
95
5.50k
    }
96
1.84k
  }
97
1.69k
98
1.69k
  if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
99
1.68k
    return false;
100
1.68k
  }
101
14
102
14
  //
103
14
  // Do the transformation of an aggr load/copy/set to a loop
104
14
  //
105
14
  for (LoadInst *LI : AggrLoads) {
106
0
    StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
107
0
    Value *SrcAddr = LI->getOperand(0);
108
0
    Value *DstAddr = SI->getOperand(1);
109
0
    unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
110
0
    ConstantInt *CopyLen =
111
0
        ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
112
0
113
0
    createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
114
0
                              /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
115
0
                              /* CopyLen */ CopyLen,
116
0
                              /* SrcAlign */ LI->getAlignment(),
117
0
                              /* DestAlign */ SI->getAlignment(),
118
0
                              /* SrcIsVolatile */ LI->isVolatile(),
119
0
                              /* DstIsVolatile */ SI->isVolatile(), TTI);
120
0
121
0
    SI->eraseFromParent();
122
0
    LI->eraseFromParent();
123
0
  }
124
14
125
14
  // Transform mem* intrinsic calls.
126
14
  for (MemIntrinsic *MemCall : MemCalls) {
127
14
    if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
128
8
      expandMemCpyAsLoop(Memcpy, TTI);
129
8
    } else 
if (MemMoveInst *6
Memmove6
= dyn_cast<MemMoveInst>(MemCall)) {
130
2
      expandMemMoveAsLoop(Memmove);
131
4
    } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
132
4
      expandMemSetAsLoop(Memset);
133
4
    }
134
14
    MemCall->eraseFromParent();
135
14
  }
136
14
137
14
  return true;
138
14
}
139
140
} // namespace
141
142
namespace llvm {
143
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
144
}
145
146
INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
147
                "Lower aggregate copies, and llvm.mem* intrinsics into loops",
148
                false, false)
149
150
263
FunctionPass *llvm::createLowerAggrCopies() {
151
263
  return new NVPTXLowerAggrCopies();
152
263
}