/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/NVPTX/NVVMReflect.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect |
11 | | // with an integer. |
12 | | // |
13 | | // We choose the value we use by looking at metadata in the module itself. Note |
14 | | // that we intentionally only have one way to choose these values, because other |
15 | | // parts of LLVM (particularly, InstCombineCall) rely on being able to predict |
16 | | // the values chosen by this pass. |
17 | | // |
18 | | // If we see an unknown string, we replace its call with 0. |
19 | | // |
20 | | //===----------------------------------------------------------------------===// |
21 | | |
22 | | #include "NVPTX.h" |
23 | | #include "llvm/ADT/SmallVector.h" |
24 | | #include "llvm/ADT/StringMap.h" |
25 | | #include "llvm/IR/Constants.h" |
26 | | #include "llvm/IR/DerivedTypes.h" |
27 | | #include "llvm/IR/Function.h" |
28 | | #include "llvm/IR/InstIterator.h" |
29 | | #include "llvm/IR/Instructions.h" |
30 | | #include "llvm/IR/Intrinsics.h" |
31 | | #include "llvm/IR/Module.h" |
32 | | #include "llvm/IR/Type.h" |
33 | | #include "llvm/Pass.h" |
34 | | #include "llvm/Support/CommandLine.h" |
35 | | #include "llvm/Support/Debug.h" |
36 | | #include "llvm/Support/raw_os_ostream.h" |
37 | | #include "llvm/Support/raw_ostream.h" |
38 | | #include "llvm/Transforms/Scalar.h" |
39 | | #include <sstream> |
40 | | #include <string> |
41 | 4.97k | #define NVVM_REFLECT_FUNCTION "__nvvm_reflect" |
42 | | |
43 | | using namespace llvm; |
44 | | |
45 | | #define DEBUG_TYPE "nvptx-reflect" |
46 | | |
47 | | namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } |
48 | | |
49 | | namespace { |
50 | | class NVVMReflect : public FunctionPass { |
51 | | public: |
52 | | static char ID; |
53 | 312 | NVVMReflect() : FunctionPass(ID) { |
54 | 312 | initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); |
55 | 312 | } |
56 | | |
57 | | bool runOnFunction(Function &) override; |
58 | | }; |
59 | | } |
60 | | |
61 | 309 | FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); } |
62 | | |
63 | | static cl::opt<bool> |
64 | | NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden, |
65 | | cl::desc("NVVM reflection, enabled by default")); |
66 | | |
67 | | char NVVMReflect::ID = 0; |
68 | | INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", |
69 | | "Replace occurrences of __nvvm_reflect() calls with 0/1", false, |
70 | | false) |
71 | | |
72 | 2.00k | bool NVVMReflect::runOnFunction(Function &F) { |
73 | 2.00k | if (!NVVMReflectEnabled) |
74 | 0 | return false; |
75 | 2.00k | |
76 | 2.00k | if (2.00k F.getName() == 2.00k NVVM_REFLECT_FUNCTION2.00k ) { |
77 | 0 | assert(F.isDeclaration() && "_reflect function should not have a body"); |
78 | 0 | assert(F.getReturnType()->isIntegerTy() && |
79 | 0 | "_reflect's return type should be integer"); |
80 | 0 | return false; |
81 | 0 | } |
82 | 2.00k | |
83 | 2.00k | SmallVector<Instruction *, 4> ToRemove; |
84 | 2.00k | |
85 | 2.00k | // Go through the calls in this function. Each call to __nvvm_reflect or |
86 | 2.00k | // llvm.nvvm.reflect should be a CallInst with a ConstantArray argument. |
87 | 2.00k | // First validate that. If the c-string corresponding to the ConstantArray can |
88 | 2.00k | // be found successfully, see if it can be found in VarMap. If so, replace the |
89 | 2.00k | // uses of CallInst with the value found in VarMap. If not, replace the use |
90 | 2.00k | // with value 0. |
91 | 2.00k | |
92 | 2.00k | // The IR for __nvvm_reflect calls differs between CUDA versions. |
93 | 2.00k | // |
94 | 2.00k | // CUDA 6.5 and earlier uses this sequence: |
95 | 2.00k | // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8 |
96 | 2.00k | // (i8 addrspace(4)* getelementptr inbounds |
97 | 2.00k | // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) |
98 | 2.00k | // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) |
99 | 2.00k | // |
100 | 2.00k | // The value returned by Sym->getOperand(0) is a Constant with a |
101 | 2.00k | // ConstantDataSequential operand which can be converted to string and used |
102 | 2.00k | // for lookup. |
103 | 2.00k | // |
104 | 2.00k | // CUDA 7.0 does it slightly differently: |
105 | 2.00k | // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast |
106 | 2.00k | // (i8 addrspace(1)* getelementptr inbounds |
107 | 2.00k | // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) |
108 | 2.00k | // |
109 | 2.00k | // In this case, we get a Constant with a GlobalVariable operand and we need |
110 | 2.00k | // to dig deeper to find its initializer with the string we'll use for lookup. |
111 | 17.8k | for (Instruction &I : instructions(F)) { |
112 | 17.8k | CallInst *Call = dyn_cast<CallInst>(&I); |
113 | 17.8k | if (!Call) |
114 | 14.8k | continue; |
115 | 2.99k | Function *Callee = Call->getCalledFunction(); |
116 | 2.99k | if (!Callee || 2.99k (Callee->getName() != 2.96k NVVM_REFLECT_FUNCTION2.96k && |
117 | 2.96k | Callee->getIntrinsicID() != Intrinsic::nvvm_reflect)) |
118 | 2.98k | continue; |
119 | 8 | |
120 | 8 | // FIXME: Improve error handling here and elsewhere in this pass. |
121 | 2.99k | assert(Call->getNumOperands() == 2 && |
122 | 8 | "Wrong number of operands to __nvvm_reflect function"); |
123 | 8 | |
124 | 8 | // In cuda 6.5 and earlier, we will have an extra constant-to-generic |
125 | 8 | // conversion of the string. |
126 | 8 | const Value *Str = Call->getArgOperand(0); |
127 | 8 | if (const CallInst *ConvCall8 = dyn_cast<CallInst>(Str)) { |
128 | 4 | // FIXME: Add assertions about ConvCall. |
129 | 4 | Str = ConvCall->getArgOperand(0); |
130 | 4 | } |
131 | 8 | assert(isa<ConstantExpr>(Str) && |
132 | 8 | "Format of __nvvm__reflect function not recognized"); |
133 | 8 | const ConstantExpr *GEP = cast<ConstantExpr>(Str); |
134 | 8 | |
135 | 8 | const Value *Sym = GEP->getOperand(0); |
136 | 8 | assert(isa<Constant>(Sym) && |
137 | 8 | "Format of __nvvm_reflect function not recognized"); |
138 | 8 | |
139 | 8 | const Value *Operand = cast<Constant>(Sym)->getOperand(0); |
140 | 8 | if (const GlobalVariable *GV8 = dyn_cast<GlobalVariable>(Operand)) { |
141 | 4 | // For CUDA-7.0 style __nvvm_reflect calls, we need to find the operand's |
142 | 4 | // initializer. |
143 | 4 | assert(GV->hasInitializer() && |
144 | 4 | "Format of _reflect function not recognized"); |
145 | 4 | const Constant *Initializer = GV->getInitializer(); |
146 | 4 | Operand = Initializer; |
147 | 4 | } |
148 | 8 | |
149 | 8 | assert(isa<ConstantDataSequential>(Operand) && |
150 | 8 | "Format of _reflect function not recognized"); |
151 | 8 | assert(cast<ConstantDataSequential>(Operand)->isCString() && |
152 | 8 | "Format of _reflect function not recognized"); |
153 | 8 | |
154 | 8 | StringRef ReflectArg = cast<ConstantDataSequential>(Operand)->getAsString(); |
155 | 8 | ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); |
156 | 8 | DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); |
157 | 8 | |
158 | 8 | int ReflectVal = 0; // The default value is 0 |
159 | 8 | if (ReflectArg == "__CUDA_FTZ"8 ) { |
160 | 7 | // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. Our |
161 | 7 | // choice here must be kept in sync with AutoUpgrade, which uses the same |
162 | 7 | // technique to detect whether ftz is enabled. |
163 | 7 | if (auto *Flag = mdconst::extract_or_null<ConstantInt>( |
164 | 7 | F.getParent()->getModuleFlag("nvvm-reflect-ftz"))) |
165 | 7 | ReflectVal = Flag->getSExtValue(); |
166 | 7 | } |
167 | 17.8k | Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal)); |
168 | 17.8k | ToRemove.push_back(Call); |
169 | 17.8k | } |
170 | 2.00k | |
171 | 2.00k | for (Instruction *I : ToRemove) |
172 | 8 | I->eraseFromParent(); |
173 | 2.00k | |
174 | 2.00k | return ToRemove.size() > 0; |
175 | 2.00k | } |