/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // \file |
10 | | // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when |
11 | | // the size is large or is not a compile-time constant. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "NVPTXLowerAggrCopies.h" |
16 | | #include "llvm/Analysis/TargetTransformInfo.h" |
17 | | #include "llvm/CodeGen/StackProtector.h" |
18 | | #include "llvm/IR/Constants.h" |
19 | | #include "llvm/IR/DataLayout.h" |
20 | | #include "llvm/IR/Function.h" |
21 | | #include "llvm/IR/IRBuilder.h" |
22 | | #include "llvm/IR/Instructions.h" |
23 | | #include "llvm/IR/IntrinsicInst.h" |
24 | | #include "llvm/IR/Intrinsics.h" |
25 | | #include "llvm/IR/LLVMContext.h" |
26 | | #include "llvm/IR/Module.h" |
27 | | #include "llvm/Support/Debug.h" |
28 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
29 | | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" |
30 | | |
31 | | #define DEBUG_TYPE "nvptx" |
32 | | |
33 | | using namespace llvm; |
34 | | |
35 | | namespace { |
36 | | |
37 | | // actual analysis class, which is a functionpass |
38 | | struct NVPTXLowerAggrCopies : public FunctionPass { |
39 | | static char ID; |
40 | | |
41 | 264 | NVPTXLowerAggrCopies() : FunctionPass(ID) {} |
42 | | |
43 | 262 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
44 | 262 | AU.addPreserved<StackProtector>(); |
45 | 262 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
46 | 262 | } |
47 | | |
48 | | bool runOnFunction(Function &F) override; |
49 | | |
50 | | static const unsigned MaxAggrCopySize = 128; |
51 | | |
52 | 1.70k | StringRef getPassName() const override { |
53 | 1.70k | return "Lower aggregate copies/intrinsics into loops"; |
54 | 1.70k | } |
55 | | }; |
56 | | |
57 | | char NVPTXLowerAggrCopies::ID = 0; |
58 | | |
59 | 1.69k | bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { |
60 | 1.69k | SmallVector<LoadInst *, 4> AggrLoads; |
61 | 1.69k | SmallVector<MemIntrinsic *, 4> MemCalls; |
62 | 1.69k | |
63 | 1.69k | const DataLayout &DL = F.getParent()->getDataLayout(); |
64 | 1.69k | LLVMContext &Context = F.getParent()->getContext(); |
65 | 1.69k | const TargetTransformInfo &TTI = |
66 | 1.69k | getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
67 | 1.69k | |
68 | 1.69k | // Collect all aggregate loads and mem* calls. |
69 | 3.53k | for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI1.84k ) { |
70 | 7.34k | for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; |
71 | 5.50k | ++II) { |
72 | 5.50k | if (LoadInst *LI = dyn_cast<LoadInst>(II)) { |
73 | 365 | if (!LI->hasOneUse()) |
74 | 30 | continue; |
75 | 335 | |
76 | 335 | if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize) |
77 | 335 | continue; |
78 | 0 | |
79 | 0 | if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) { |
80 | 0 | if (SI->getOperand(0) != LI) |
81 | 0 | continue; |
82 | 0 | AggrLoads.push_back(LI); |
83 | 0 | } |
84 | 5.14k | } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) { |
85 | 14 | // Convert intrinsic calls with variable size or with constant size |
86 | 14 | // larger than the MaxAggrCopySize threshold. |
87 | 14 | if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) { |
88 | 2 | if (LenCI->getZExtValue() >= MaxAggrCopySize) { |
89 | 2 | MemCalls.push_back(IntrCall); |
90 | 2 | } |
91 | 12 | } else { |
92 | 12 | MemCalls.push_back(IntrCall); |
93 | 12 | } |
94 | 14 | } |
95 | 5.50k | } |
96 | 1.84k | } |
97 | 1.69k | |
98 | 1.69k | if (AggrLoads.size() == 0 && MemCalls.size() == 0) { |
99 | 1.68k | return false; |
100 | 1.68k | } |
101 | 14 | |
102 | 14 | // |
103 | 14 | // Do the transformation of an aggr load/copy/set to a loop |
104 | 14 | // |
105 | 14 | for (LoadInst *LI : AggrLoads) { |
106 | 0 | StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin()); |
107 | 0 | Value *SrcAddr = LI->getOperand(0); |
108 | 0 | Value *DstAddr = SI->getOperand(1); |
109 | 0 | unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); |
110 | 0 | ConstantInt *CopyLen = |
111 | 0 | ConstantInt::get(Type::getInt32Ty(Context), NumLoads); |
112 | 0 |
|
113 | 0 | createMemCpyLoopKnownSize(/* ConvertedInst */ SI, |
114 | 0 | /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, |
115 | 0 | /* CopyLen */ CopyLen, |
116 | 0 | /* SrcAlign */ LI->getAlignment(), |
117 | 0 | /* DestAlign */ SI->getAlignment(), |
118 | 0 | /* SrcIsVolatile */ LI->isVolatile(), |
119 | 0 | /* DstIsVolatile */ SI->isVolatile(), TTI); |
120 | 0 |
|
121 | 0 | SI->eraseFromParent(); |
122 | 0 | LI->eraseFromParent(); |
123 | 0 | } |
124 | 14 | |
125 | 14 | // Transform mem* intrinsic calls. |
126 | 14 | for (MemIntrinsic *MemCall : MemCalls) { |
127 | 14 | if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) { |
128 | 8 | expandMemCpyAsLoop(Memcpy, TTI); |
129 | 8 | } else if (MemMoveInst *6 Memmove6 = dyn_cast<MemMoveInst>(MemCall)) { |
130 | 2 | expandMemMoveAsLoop(Memmove); |
131 | 4 | } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) { |
132 | 4 | expandMemSetAsLoop(Memset); |
133 | 4 | } |
134 | 14 | MemCall->eraseFromParent(); |
135 | 14 | } |
136 | 14 | |
137 | 14 | return true; |
138 | 14 | } |
139 | | |
140 | | } // namespace |
141 | | |
142 | | namespace llvm { |
143 | | void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); |
144 | | } |
145 | | |
146 | | INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies", |
147 | | "Lower aggregate copies, and llvm.mem* intrinsics into loops", |
148 | | false, false) |
149 | | |
150 | 263 | FunctionPass *llvm::createLowerAggrCopies() { |
151 | 263 | return new NVPTXLowerAggrCopies(); |
152 | 263 | } |