/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGGPUBuiltin.cpp
Line | Count | Source |
1 | | //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Generates code for built-in GPU calls which are not runtime-specific. |
10 | | // (Runtime-specific codegen lives in programming model specific files.) |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "CodeGenFunction.h" |
15 | | #include "clang/Basic/Builtins.h" |
16 | | #include "llvm/IR/DataLayout.h" |
17 | | #include "llvm/IR/Instruction.h" |
18 | | #include "llvm/Support/MathExtras.h" |
19 | | #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" |
20 | | |
21 | | using namespace clang; |
22 | | using namespace CodeGen; |
23 | | |
24 | 9 | static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { |
25 | 9 | llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), |
26 | 9 | llvm::Type::getInt8PtrTy(M.getContext())}; |
27 | 9 | llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( |
28 | 9 | llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); |
29 | | |
30 | 9 | if (auto* F = M.getFunction("vprintf")) { |
31 | | // Our CUDA system header declares vprintf with the right signature, so |
32 | | // nobody else should have been able to declare vprintf with a bogus |
33 | | // signature. |
34 | 6 | assert(F->getFunctionType() == VprintfFuncType); |
35 | 6 | return F; |
36 | 6 | } |
37 | | |
38 | | // vprintf doesn't already exist; create a declaration and insert it into the |
39 | | // module. |
40 | 3 | return llvm::Function::Create( |
41 | 3 | VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); |
42 | 3 | } |
43 | | |
44 | | // Transforms a call to printf into a call to the NVPTX vprintf syscall (which |
45 | | // isn't particularly special; it's invoked just like a regular function). |
46 | | // vprintf takes two args: A format string, and a pointer to a buffer containing |
47 | | // the varargs. |
48 | | // |
49 | | // For example, the call |
50 | | // |
51 | | // printf("format string", arg1, arg2, arg3); |
52 | | // |
53 | | // is converted into something resembling |
54 | | // |
55 | | // struct Tmp { |
56 | | // Arg1 a1; |
57 | | // Arg2 a2; |
58 | | // Arg3 a3; |
59 | | // }; |
60 | | // char* buf = alloca(sizeof(Tmp)); |
61 | | // *(Tmp*)buf = {a1, a2, a3}; |
62 | | // vprintf("format string", buf); |
63 | | // |
64 | | // buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the |
65 | | // args is itself aligned to its preferred alignment. |
66 | | // |
67 | | // Note that by the time this function runs, E's args have already undergone the |
68 | | // standard C vararg promotion (short -> int, float -> double, etc.). |
69 | | RValue |
70 | | CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, |
71 | 10 | ReturnValueSlot ReturnValue) { |
72 | 10 | assert(getTarget().getTriple().isNVPTX()); |
73 | 10 | assert(E->getBuiltinCallee() == Builtin::BIprintf); |
74 | 10 | assert(E->getNumArgs() >= 1); // printf always has at least one arg. |
75 | | |
76 | 10 | const llvm::DataLayout &DL = CGM.getDataLayout(); |
77 | 10 | llvm::LLVMContext &Ctx = CGM.getLLVMContext(); |
78 | | |
79 | 10 | CallArgList Args; |
80 | 10 | EmitCallArgs(Args, |
81 | 10 | E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), |
82 | 10 | E->arguments(), E->getDirectCallee(), |
83 | 10 | /* ParamsToSkip = */ 0); |
84 | | |
85 | | // We don't know how to emit non-scalar varargs. |
86 | 13 | if (std::any_of(Args.begin() + 1, Args.end(), [&](const CallArg &A) 10 { |
87 | 13 | return !A.getRValue(*this).isScalar(); |
88 | 1 | })) { |
89 | 1 | CGM.ErrorUnsupported(E, "non-scalar arg to printf"); |
90 | 1 | return RValue::get(llvm::ConstantInt::get(IntTy, 0)); |
91 | 1 | } |
92 | | |
93 | | // Construct and fill the args buffer that we'll pass to vprintf. |
94 | 9 | llvm::Value *BufferPtr; |
95 | 9 | if (Args.size() <= 1) { |
96 | | // If there are no args, pass a null pointer to vprintf. |
97 | 3 | BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); |
98 | 6 | } else { |
99 | 6 | llvm::SmallVector<llvm::Type *, 8> ArgTypes; |
100 | 18 | for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I12 ) |
101 | 12 | ArgTypes.push_back(Args[I].getRValue(*this).getScalarVal()->getType()); |
102 | | |
103 | | // Using llvm::StructType is correct only because printf doesn't accept |
104 | | // aggregates. If we had to handle aggregates here, we'd have to manually |
105 | | // compute the offsets within the alloca -- we wouldn't be able to assume |
106 | | // that the alignment of the llvm type was the same as the alignment of the |
107 | | // clang type. |
108 | 6 | llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); |
109 | 6 | llvm::Value *Alloca = CreateTempAlloca(AllocaTy); |
110 | | |
111 | 18 | for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I12 ) { |
112 | 12 | llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); |
113 | 12 | llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal(); |
114 | 12 | Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); |
115 | 12 | } |
116 | 6 | BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); |
117 | 6 | } |
118 | | |
119 | | // Invoke vprintf and return. |
120 | 9 | llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); |
121 | 9 | return RValue::get(Builder.CreateCall( |
122 | 9 | VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr})); |
123 | 9 | } |
124 | | |
125 | | RValue |
126 | | CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, |
127 | 4 | ReturnValueSlot ReturnValue) { |
128 | 4 | assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn); |
129 | 4 | assert(E->getBuiltinCallee() == Builtin::BIprintf || |
130 | 4 | E->getBuiltinCallee() == Builtin::BI__builtin_printf); |
131 | 4 | assert(E->getNumArgs() >= 1); // printf always has at least one arg. |
132 | | |
133 | 4 | CallArgList CallArgs; |
134 | 4 | EmitCallArgs(CallArgs, |
135 | 4 | E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), |
136 | 4 | E->arguments(), E->getDirectCallee(), |
137 | 4 | /* ParamsToSkip = */ 0); |
138 | | |
139 | 4 | SmallVector<llvm::Value *, 8> Args; |
140 | 14 | for (auto A : CallArgs) { |
141 | | // We don't know how to emit non-scalar varargs. |
142 | 14 | if (!A.getRValue(*this).isScalar()) { |
143 | 1 | CGM.ErrorUnsupported(E, "non-scalar arg to printf"); |
144 | 1 | return RValue::get(llvm::ConstantInt::get(IntTy, -1)); |
145 | 1 | } |
146 | | |
147 | 13 | llvm::Value *Arg = A.getRValue(*this).getScalarVal(); |
148 | 13 | Args.push_back(Arg); |
149 | 13 | } |
150 | | |
151 | 3 | llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); |
152 | 3 | IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); |
153 | 3 | auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args); |
154 | 3 | Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); |
155 | 3 | return RValue::get(Printf); |
156 | 4 | } |