/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// AMDGPU HSA Metadata Streamer. |
11 | | /// |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AMDGPUHSAMetadataStreamer.h" |
16 | | #include "AMDGPU.h" |
17 | | #include "AMDGPUSubtarget.h" |
18 | | #include "MCTargetDesc/AMDGPUTargetStreamer.h" |
19 | | #include "SIMachineFunctionInfo.h" |
20 | | #include "SIProgramInfo.h" |
21 | | #include "Utils/AMDGPUBaseInfo.h" |
22 | | #include "llvm/ADT/StringSwitch.h" |
23 | | #include "llvm/IR/Constants.h" |
24 | | #include "llvm/IR/Module.h" |
25 | | #include "llvm/Support/raw_ostream.h" |
26 | | |
27 | | namespace llvm { |
28 | | |
29 | | static cl::opt<bool> DumpHSAMetadata( |
30 | | "amdgpu-dump-hsa-metadata", |
31 | | cl::desc("Dump AMDGPU HSA Metadata")); |
32 | | static cl::opt<bool> VerifyHSAMetadata( |
33 | | "amdgpu-verify-hsa-metadata", |
34 | | cl::desc("Verify AMDGPU HSA Metadata")); |
35 | | |
36 | | namespace AMDGPU { |
37 | | namespace HSAMD { |
38 | | |
39 | | //===----------------------------------------------------------------------===// |
40 | | // HSAMetadataStreamerV2 |
41 | | //===----------------------------------------------------------------------===// |
42 | 4 | void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { |
43 | 4 | errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; |
44 | 4 | } |
45 | | |
46 | 4 | void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { |
47 | 4 | errs() << "AMDGPU HSA Metadata Parser Test: "; |
48 | 4 | |
49 | 4 | HSAMD::Metadata FromHSAMetadataString; |
50 | 4 | if (fromString(HSAMetadataString, FromHSAMetadataString)) { |
51 | 0 | errs() << "FAIL\n"; |
52 | 0 | return; |
53 | 0 | } |
54 | 4 | |
55 | 4 | std::string ToHSAMetadataString; |
56 | 4 | if (toString(FromHSAMetadataString, ToHSAMetadataString)) { |
57 | 0 | errs() << "FAIL\n"; |
58 | 0 | return; |
59 | 0 | } |
60 | 4 | |
61 | 4 | errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL"0 ) |
62 | 4 | << '\n'; |
63 | 4 | if (HSAMetadataString != ToHSAMetadataString) { |
64 | 0 | errs() << "Original input: " << HSAMetadataString << '\n' |
65 | 0 | << "Produced output: " << ToHSAMetadataString << '\n'; |
66 | 0 | } |
67 | 4 | } |
68 | | |
69 | | AccessQualifier |
70 | 2.86k | MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { |
71 | 2.86k | if (AccQual.empty()) |
72 | 2.53k | return AccessQualifier::Unknown; |
73 | 330 | |
74 | 330 | return StringSwitch<AccessQualifier>(AccQual) |
75 | 330 | .Case("read_only", AccessQualifier::ReadOnly) |
76 | 330 | .Case("write_only", AccessQualifier::WriteOnly) |
77 | 330 | .Case("read_write", AccessQualifier::ReadWrite) |
78 | 330 | .Default(AccessQualifier::Default); |
79 | 330 | } |
80 | | |
81 | | AddressSpaceQualifier |
82 | | MetadataStreamerV2::getAddressSpaceQualifier( |
83 | 1.67k | unsigned AddressSpace) const { |
84 | 1.67k | switch (AddressSpace) { |
85 | 1.67k | case AMDGPUAS::PRIVATE_ADDRESS: |
86 | 14 | return AddressSpaceQualifier::Private; |
87 | 1.67k | case AMDGPUAS::GLOBAL_ADDRESS: |
88 | 1.53k | return AddressSpaceQualifier::Global; |
89 | 1.67k | case AMDGPUAS::CONSTANT_ADDRESS: |
90 | 11 | return AddressSpaceQualifier::Constant; |
91 | 1.67k | case AMDGPUAS::LOCAL_ADDRESS: |
92 | 97 | return AddressSpaceQualifier::Local; |
93 | 1.67k | case AMDGPUAS::FLAT_ADDRESS: |
94 | 10 | return AddressSpaceQualifier::Generic; |
95 | 1.67k | case AMDGPUAS::REGION_ADDRESS: |
96 | 0 | return AddressSpaceQualifier::Region; |
97 | 1.67k | default: |
98 | 6 | return AddressSpaceQualifier::Unknown; |
99 | 1.67k | } |
100 | 1.67k | } |
101 | | |
102 | | ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, |
103 | 1.24k | StringRef BaseTypeName) const { |
104 | 1.24k | if (TypeQual.find("pipe") != StringRef::npos) |
105 | 6 | return ValueKind::Pipe; |
106 | 1.23k | |
107 | 1.23k | return StringSwitch<ValueKind>(BaseTypeName) |
108 | 1.23k | .Case("image1d_t", ValueKind::Image) |
109 | 1.23k | .Case("image1d_array_t", ValueKind::Image) |
110 | 1.23k | .Case("image1d_buffer_t", ValueKind::Image) |
111 | 1.23k | .Case("image2d_t", ValueKind::Image) |
112 | 1.23k | .Case("image2d_array_t", ValueKind::Image) |
113 | 1.23k | .Case("image2d_array_depth_t", ValueKind::Image) |
114 | 1.23k | .Case("image2d_array_msaa_t", ValueKind::Image) |
115 | 1.23k | .Case("image2d_array_msaa_depth_t", ValueKind::Image) |
116 | 1.23k | .Case("image2d_depth_t", ValueKind::Image) |
117 | 1.23k | .Case("image2d_msaa_t", ValueKind::Image) |
118 | 1.23k | .Case("image2d_msaa_depth_t", ValueKind::Image) |
119 | 1.23k | .Case("image3d_t", ValueKind::Image) |
120 | 1.23k | .Case("sampler_t", ValueKind::Sampler) |
121 | 1.23k | .Case("queue_t", ValueKind::Queue) |
122 | 1.23k | .Default(isa<PointerType>(Ty) ? |
123 | 760 | (Ty->getPointerAddressSpace() == |
124 | 760 | AMDGPUAS::LOCAL_ADDRESS ? |
125 | 97 | ValueKind::DynamicSharedPointer : |
126 | 760 | ValueKind::GlobalBuffer663 ) : |
127 | 1.23k | ValueKind::ByValue474 ); |
128 | 1.23k | } |
129 | | |
130 | 4.76k | ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { |
131 | 4.76k | switch (Ty->getTypeID()) { |
132 | 4.76k | case Type::IntegerTyID: { |
133 | 2.52k | auto Signed = !TypeName.startswith("u"); |
134 | 2.52k | switch (Ty->getIntegerBitWidth()) { |
135 | 2.52k | case 8: |
136 | 1.08k | return Signed ? ValueType::I8 : ValueType::U80 ; |
137 | 2.52k | case 16: |
138 | 29 | return Signed ? ValueType::I1623 : ValueType::U166 ; |
139 | 2.52k | case 32: |
140 | 630 | return Signed ? ValueType::I32 : ValueType::U320 ; |
141 | 2.52k | case 64: |
142 | 759 | return Signed ? ValueType::I64753 : ValueType::U646 ; |
143 | 2.52k | default: |
144 | 25 | return ValueType::Struct; |
145 | 0 | } |
146 | 0 | } |
147 | 78 | case Type::HalfTyID: |
148 | 78 | return ValueType::F16; |
149 | 53 | case Type::FloatTyID: |
150 | 53 | return ValueType::F32; |
151 | 25 | case Type::DoubleTyID: |
152 | 25 | return ValueType::F64; |
153 | 1.68k | case Type::PointerTyID: |
154 | 1.68k | return getValueType(Ty->getPointerElementType(), TypeName); |
155 | 213 | case Type::VectorTyID: |
156 | 213 | return getValueType(Ty->getVectorElementType(), TypeName); |
157 | 181 | default: |
158 | 181 | return ValueType::Struct; |
159 | 4.76k | } |
160 | 4.76k | } |
161 | | |
162 | 66 | std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { |
163 | 66 | switch (Ty->getTypeID()) { |
164 | 66 | case Type::IntegerTyID: { |
165 | 36 | if (!Signed) |
166 | 6 | return (Twine('u') + getTypeName(Ty, true)).str(); |
167 | 30 | |
168 | 30 | auto BitWidth = Ty->getIntegerBitWidth(); |
169 | 30 | switch (BitWidth) { |
170 | 30 | case 8: |
171 | 6 | return "char"; |
172 | 30 | case 16: |
173 | 6 | return "short"; |
174 | 30 | case 32: |
175 | 12 | return "int"; |
176 | 30 | case 64: |
177 | 6 | return "long"; |
178 | 30 | default: |
179 | 0 | return (Twine('i') + Twine(BitWidth)).str(); |
180 | 0 | } |
181 | 0 | } |
182 | 6 | case Type::HalfTyID: |
183 | 6 | return "half"; |
184 | 6 | case Type::FloatTyID: |
185 | 6 | return "float"; |
186 | 6 | case Type::DoubleTyID: |
187 | 6 | return "double"; |
188 | 6 | case Type::VectorTyID: { |
189 | 6 | auto VecTy = cast<VectorType>(Ty); |
190 | 6 | auto ElTy = VecTy->getElementType(); |
191 | 6 | auto NumElements = VecTy->getVectorNumElements(); |
192 | 6 | return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); |
193 | 0 | } |
194 | 6 | default: |
195 | 6 | return "unknown"; |
196 | 66 | } |
197 | 66 | } |
198 | | |
199 | | std::vector<uint32_t> |
200 | 12 | MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { |
201 | 12 | std::vector<uint32_t> Dims; |
202 | 12 | if (Node->getNumOperands() != 3) |
203 | 0 | return Dims; |
204 | 12 | |
205 | 12 | for (auto &Op : Node->operands()) |
206 | 36 | Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); |
207 | 12 | return Dims; |
208 | 12 | } |
209 | | |
210 | | Kernel::CodeProps::Metadata |
211 | | MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, |
212 | 765 | const SIProgramInfo &ProgramInfo) const { |
213 | 765 | const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); |
214 | 765 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
215 | 765 | HSAMD::Kernel::CodeProps::Metadata HSACodeProps; |
216 | 765 | const Function &F = MF.getFunction(); |
217 | 765 | |
218 | 765 | assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || |
219 | 765 | F.getCallingConv() == CallingConv::SPIR_KERNEL); |
220 | 765 | |
221 | 765 | unsigned MaxKernArgAlign; |
222 | 765 | HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, |
223 | 765 | MaxKernArgAlign); |
224 | 765 | HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; |
225 | 765 | HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; |
226 | 765 | HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); |
227 | 765 | HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); |
228 | 765 | HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; |
229 | 765 | HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; |
230 | 765 | HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); |
231 | 765 | HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; |
232 | 765 | HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); |
233 | 765 | HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); |
234 | 765 | HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); |
235 | 765 | |
236 | 765 | return HSACodeProps; |
237 | 765 | } |
238 | | |
239 | | Kernel::DebugProps::Metadata |
240 | | MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, |
241 | 765 | const SIProgramInfo &ProgramInfo) const { |
242 | 765 | return HSAMD::Kernel::DebugProps::Metadata(); |
243 | 765 | } |
244 | | |
245 | 166 | void MetadataStreamerV2::emitVersion() { |
246 | 166 | auto &Version = HSAMetadata.mVersion; |
247 | 166 | |
248 | 166 | Version.push_back(VersionMajor); |
249 | 166 | Version.push_back(VersionMinor); |
250 | 166 | } |
251 | | |
252 | 166 | void MetadataStreamerV2::emitPrintf(const Module &Mod) { |
253 | 166 | auto &Printf = HSAMetadata.mPrintf; |
254 | 166 | |
255 | 166 | auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); |
256 | 166 | if (!Node) |
257 | 160 | return; |
258 | 6 | |
259 | 6 | for (auto Op : Node->operands()) |
260 | 12 | if (Op->getNumOperands()) |
261 | 12 | Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); |
262 | 6 | } |
263 | | |
264 | 765 | void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { |
265 | 765 | auto &Kernel = HSAMetadata.mKernels.back(); |
266 | 765 | |
267 | 765 | // TODO: What about other languages? |
268 | 765 | auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); |
269 | 765 | if (!Node || !Node->getNumOperands()214 ) |
270 | 551 | return; |
271 | 214 | auto Op0 = Node->getOperand(0); |
272 | 214 | if (Op0->getNumOperands() <= 1) |
273 | 0 | return; |
274 | 214 | |
275 | 214 | Kernel.mLanguage = "OpenCL C"; |
276 | 214 | Kernel.mLanguageVersion.push_back( |
277 | 214 | mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); |
278 | 214 | Kernel.mLanguageVersion.push_back( |
279 | 214 | mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); |
280 | 214 | } |
281 | | |
282 | 765 | void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { |
283 | 765 | auto &Attrs = HSAMetadata.mKernels.back().mAttrs; |
284 | 765 | |
285 | 765 | if (auto Node = Func.getMetadata("reqd_work_group_size")) |
286 | 6 | Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); |
287 | 765 | if (auto Node = Func.getMetadata("work_group_size_hint")) |
288 | 6 | Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); |
289 | 765 | if (auto Node = Func.getMetadata("vec_type_hint")) { |
290 | 54 | Attrs.mVecTypeHint = getTypeName( |
291 | 54 | cast<ValueAsMetadata>(Node->getOperand(0))->getType(), |
292 | 54 | mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); |
293 | 54 | } |
294 | 765 | if (Func.hasFnAttribute("runtime-handle")) { |
295 | 6 | Attrs.mRuntimeHandle = |
296 | 6 | Func.getFnAttribute("runtime-handle").getValueAsString().str(); |
297 | 6 | } |
298 | 765 | } |
299 | | |
300 | 765 | void MetadataStreamerV2::emitKernelArgs(const Function &Func) { |
301 | 765 | for (auto &Arg : Func.args()) |
302 | 1.24k | emitKernelArg(Arg); |
303 | 765 | |
304 | 765 | emitHiddenKernelArgs(Func); |
305 | 765 | } |
306 | | |
307 | 1.24k | void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { |
308 | 1.24k | auto Func = Arg.getParent(); |
309 | 1.24k | auto ArgNo = Arg.getArgNo(); |
310 | 1.24k | const MDNode *Node; |
311 | 1.24k | |
312 | 1.24k | StringRef Name; |
313 | 1.24k | Node = Func->getMetadata("kernel_arg_name"); |
314 | 1.24k | if (Node && ArgNo < Node->getNumOperands()0 ) |
315 | 0 | Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
316 | 1.24k | else if (Arg.hasName()) |
317 | 1.17k | Name = Arg.getName(); |
318 | 1.24k | |
319 | 1.24k | StringRef TypeName; |
320 | 1.24k | Node = Func->getMetadata("kernel_arg_type"); |
321 | 1.24k | if (Node && ArgNo < Node->getNumOperands()378 ) |
322 | 366 | TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
323 | 1.24k | |
324 | 1.24k | StringRef BaseTypeName; |
325 | 1.24k | Node = Func->getMetadata("kernel_arg_base_type"); |
326 | 1.24k | if (Node && ArgNo < Node->getNumOperands()378 ) |
327 | 366 | BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
328 | 1.24k | |
329 | 1.24k | StringRef AccQual; |
330 | 1.24k | if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory()766 && |
331 | 1.24k | Arg.hasNoAliasAttr()20 ) { |
332 | 1 | AccQual = "read_only"; |
333 | 1.23k | } else { |
334 | 1.23k | Node = Func->getMetadata("kernel_arg_access_qual"); |
335 | 1.23k | if (Node && ArgNo < Node->getNumOperands()341 ) |
336 | 329 | AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
337 | 1.23k | } |
338 | 1.24k | |
339 | 1.24k | StringRef TypeQual; |
340 | 1.24k | Node = Func->getMetadata("kernel_arg_type_qual"); |
341 | 1.24k | if (Node && ArgNo < Node->getNumOperands()342 ) |
342 | 330 | TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
343 | 1.24k | |
344 | 1.24k | Type *Ty = Arg.getType(); |
345 | 1.24k | const DataLayout &DL = Func->getParent()->getDataLayout(); |
346 | 1.24k | |
347 | 1.24k | unsigned PointeeAlign = 0; |
348 | 1.24k | if (auto PtrTy = dyn_cast<PointerType>(Ty)) { |
349 | 766 | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { |
350 | 97 | PointeeAlign = Arg.getParamAlignment(); |
351 | 97 | if (PointeeAlign == 0) |
352 | 54 | PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); |
353 | 97 | } |
354 | 766 | } |
355 | 1.24k | |
356 | 1.24k | emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), |
357 | 1.24k | PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); |
358 | 1.24k | } |
359 | | |
360 | | void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, |
361 | | ValueKind ValueKind, |
362 | | unsigned PointeeAlign, StringRef Name, |
363 | | StringRef TypeName, |
364 | | StringRef BaseTypeName, |
365 | 2.86k | StringRef AccQual, StringRef TypeQual) { |
366 | 2.86k | HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); |
367 | 2.86k | auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); |
368 | 2.86k | |
369 | 2.86k | Arg.mName = Name; |
370 | 2.86k | Arg.mTypeName = TypeName; |
371 | 2.86k | Arg.mSize = DL.getTypeAllocSize(Ty); |
372 | 2.86k | Arg.mAlign = DL.getABITypeAlignment(Ty); |
373 | 2.86k | Arg.mValueKind = ValueKind; |
374 | 2.86k | Arg.mValueType = getValueType(Ty, BaseTypeName); |
375 | 2.86k | Arg.mPointeeAlign = PointeeAlign; |
376 | 2.86k | |
377 | 2.86k | if (auto PtrTy = dyn_cast<PointerType>(Ty)) |
378 | 1.67k | Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); |
379 | 2.86k | |
380 | 2.86k | Arg.mAccQual = getAccessQualifier(AccQual); |
381 | 2.86k | |
382 | 2.86k | // TODO: Emit Arg.mActualAccQual. |
383 | 2.86k | |
384 | 2.86k | SmallVector<StringRef, 1> SplitTypeQuals; |
385 | 2.86k | TypeQual.split(SplitTypeQuals, " ", -1, false); |
386 | 2.86k | for (StringRef Key : SplitTypeQuals) { |
387 | 26 | auto P = StringSwitch<bool*>(Key) |
388 | 26 | .Case("const", &Arg.mIsConst) |
389 | 26 | .Case("restrict", &Arg.mIsRestrict) |
390 | 26 | .Case("volatile", &Arg.mIsVolatile) |
391 | 26 | .Case("pipe", &Arg.mIsPipe) |
392 | 26 | .Default(nullptr); |
393 | 26 | if (P) |
394 | 26 | *P = true; |
395 | 26 | } |
396 | 2.86k | } |
397 | | |
398 | 765 | void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { |
399 | 765 | int HiddenArgNumBytes = |
400 | 765 | getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); |
401 | 765 | |
402 | 765 | if (!HiddenArgNumBytes) |
403 | 522 | return; |
404 | 243 | |
405 | 243 | auto &DL = Func.getParent()->getDataLayout(); |
406 | 243 | auto Int64Ty = Type::getInt64Ty(Func.getContext()); |
407 | 243 | |
408 | 243 | if (HiddenArgNumBytes >= 8) |
409 | 243 | emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); |
410 | 243 | if (HiddenArgNumBytes >= 16) |
411 | 240 | emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); |
412 | 243 | if (HiddenArgNumBytes >= 24) |
413 | 237 | emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); |
414 | 243 | |
415 | 243 | auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), |
416 | 243 | AMDGPUAS::GLOBAL_ADDRESS); |
417 | 243 | |
418 | 243 | // Emit "printf buffer" argument if printf is used, otherwise emit dummy |
419 | 243 | // "none" argument. |
420 | 243 | if (HiddenArgNumBytes >= 32) { |
421 | 234 | if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) |
422 | 210 | emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); |
423 | 24 | else |
424 | 24 | emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); |
425 | 234 | } |
426 | 243 | |
427 | 243 | // Emit "default queue" and "completion action" arguments if enqueue kernel is |
428 | 243 | // used, otherwise emit dummy "none" arguments. |
429 | 243 | if (HiddenArgNumBytes >= 48) { |
430 | 229 | if (Func.hasFnAttribute("calls-enqueue-kernel")) { |
431 | 8 | emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); |
432 | 8 | emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); |
433 | 221 | } else { |
434 | 221 | emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); |
435 | 221 | emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); |
436 | 221 | } |
437 | 229 | } |
438 | 243 | |
439 | 243 | // Emit the pointer argument for multi-grid object. |
440 | 243 | if (HiddenArgNumBytes >= 56) |
441 | 213 | emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg); |
442 | 243 | } |
443 | | |
444 | 165 | bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { |
445 | 165 | return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); |
446 | 165 | } |
447 | | |
448 | 166 | void MetadataStreamerV2::begin(const Module &Mod) { |
449 | 166 | emitVersion(); |
450 | 166 | emitPrintf(Mod); |
451 | 166 | } |
452 | | |
453 | 165 | void MetadataStreamerV2::end() { |
454 | 165 | std::string HSAMetadataString; |
455 | 165 | if (toString(HSAMetadata, HSAMetadataString)) |
456 | 0 | return; |
457 | 165 | |
458 | 165 | if (DumpHSAMetadata) |
459 | 4 | dump(HSAMetadataString); |
460 | 165 | if (VerifyHSAMetadata) |
461 | 4 | verify(HSAMetadataString); |
462 | 165 | } |
463 | | |
464 | | void MetadataStreamerV2::emitKernel(const MachineFunction &MF, |
465 | 768 | const SIProgramInfo &ProgramInfo) { |
466 | 768 | auto &Func = MF.getFunction(); |
467 | 768 | if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) |
468 | 3 | return; |
469 | 765 | |
470 | 765 | auto CodeProps = getHSACodeProps(MF, ProgramInfo); |
471 | 765 | auto DebugProps = getHSADebugProps(MF, ProgramInfo); |
472 | 765 | |
473 | 765 | HSAMetadata.mKernels.push_back(Kernel::Metadata()); |
474 | 765 | auto &Kernel = HSAMetadata.mKernels.back(); |
475 | 765 | |
476 | 765 | Kernel.mName = Func.getName(); |
477 | 765 | Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); |
478 | 765 | emitKernelLanguage(Func); |
479 | 765 | emitKernelAttrs(Func); |
480 | 765 | emitKernelArgs(Func); |
481 | 765 | HSAMetadata.mKernels.back().mCodeProps = CodeProps; |
482 | 765 | HSAMetadata.mKernels.back().mDebugProps = DebugProps; |
483 | 765 | } |
484 | | |
485 | | //===----------------------------------------------------------------------===// |
486 | | // HSAMetadataStreamerV3 |
487 | | //===----------------------------------------------------------------------===// |
488 | | |
489 | 4 | void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { |
490 | 4 | errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; |
491 | 4 | } |
492 | | |
493 | 5 | void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { |
494 | 5 | errs() << "AMDGPU HSA Metadata Parser Test: "; |
495 | 5 | |
496 | 5 | msgpack::Document FromHSAMetadataString; |
497 | 5 | |
498 | 5 | if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { |
499 | 0 | errs() << "FAIL\n"; |
500 | 0 | return; |
501 | 0 | } |
502 | 5 | |
503 | 5 | std::string ToHSAMetadataString; |
504 | 5 | raw_string_ostream StrOS(ToHSAMetadataString); |
505 | 5 | FromHSAMetadataString.toYAML(StrOS); |
506 | 5 | |
507 | 5 | errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL"0 ) << '\n'; |
508 | 5 | if (HSAMetadataString != ToHSAMetadataString) { |
509 | 0 | errs() << "Original input: " << HSAMetadataString << '\n' |
510 | 0 | << "Produced output: " << StrOS.str() << '\n'; |
511 | 0 | } |
512 | 5 | } |
513 | | |
514 | | Optional<StringRef> |
515 | 8.12k | MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { |
516 | 8.12k | return StringSwitch<Optional<StringRef>>(AccQual) |
517 | 8.12k | .Case("read_only", StringRef("read_only")) |
518 | 8.12k | .Case("write_only", StringRef("write_only")) |
519 | 8.12k | .Case("read_write", StringRef("read_write")) |
520 | 8.12k | .Default(None); |
521 | 8.12k | } |
522 | | |
523 | | Optional<StringRef> |
524 | 4.73k | MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { |
525 | 4.73k | switch (AddressSpace) { |
526 | 4.73k | case AMDGPUAS::PRIVATE_ADDRESS: |
527 | 28 | return StringRef("private"); |
528 | 4.73k | case AMDGPUAS::GLOBAL_ADDRESS: |
529 | 3.02k | return StringRef("global"); |
530 | 4.73k | case AMDGPUAS::CONSTANT_ADDRESS: |
531 | 247 | return StringRef("constant"); |
532 | 4.73k | case AMDGPUAS::LOCAL_ADDRESS: |
533 | 178 | return StringRef("local"); |
534 | 4.73k | case AMDGPUAS::FLAT_ADDRESS: |
535 | 1.25k | return StringRef("generic"); |
536 | 4.73k | case AMDGPUAS::REGION_ADDRESS: |
537 | 0 | return StringRef("region"); |
538 | 4.73k | default: |
539 | 6 | return None; |
540 | 4.73k | } |
541 | 4.73k | } |
542 | | |
543 | | StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, |
544 | 6.56k | StringRef BaseTypeName) const { |
545 | 6.56k | if (TypeQual.find("pipe") != StringRef::npos) |
546 | 6 | return "pipe"; |
547 | 6.55k | |
548 | 6.55k | return StringSwitch<StringRef>(BaseTypeName) |
549 | 6.55k | .Case("image1d_t", "image") |
550 | 6.55k | .Case("image1d_array_t", "image") |
551 | 6.55k | .Case("image1d_buffer_t", "image") |
552 | 6.55k | .Case("image2d_t", "image") |
553 | 6.55k | .Case("image2d_array_t", "image") |
554 | 6.55k | .Case("image2d_array_depth_t", "image") |
555 | 6.55k | .Case("image2d_array_msaa_t", "image") |
556 | 6.55k | .Case("image2d_array_msaa_depth_t", "image") |
557 | 6.55k | .Case("image2d_depth_t", "image") |
558 | 6.55k | .Case("image2d_msaa_t", "image") |
559 | 6.55k | .Case("image2d_msaa_depth_t", "image") |
560 | 6.55k | .Case("image3d_t", "image") |
561 | 6.55k | .Case("sampler_t", "sampler") |
562 | 6.55k | .Case("queue_t", "queue") |
563 | 6.55k | .Default(isa<PointerType>(Ty) |
564 | 6.55k | ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS |
565 | 3.85k | ? "dynamic_shared_pointer"178 |
566 | 3.85k | : "global_buffer"3.67k ) |
567 | 6.55k | : "by_value"2.70k ); |
568 | 6.55k | } |
569 | | |
570 | 14.5k | StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { |
571 | 14.5k | switch (Ty->getTypeID()) { |
572 | 14.5k | case Type::IntegerTyID: { |
573 | 6.78k | auto Signed = !TypeName.startswith("u"); |
574 | 6.78k | switch (Ty->getIntegerBitWidth()) { |
575 | 6.78k | case 8: |
576 | 1.20k | return Signed ? "i8" : "u8"0 ; |
577 | 6.78k | case 16: |
578 | 513 | return Signed ? "i16"507 : "u16"6 ; |
579 | 6.78k | case 32: |
580 | 4.03k | return Signed ? "i32" : "u32"0 ; |
581 | 6.78k | case 64: |
582 | 969 | return Signed ? "i64"963 : "u64"6 ; |
583 | 6.78k | default: |
584 | 58 | return "struct"; |
585 | 0 | } |
586 | 0 | } |
587 | 740 | case Type::HalfTyID: |
588 | 740 | return "f16"; |
589 | 235 | case Type::FloatTyID: |
590 | 235 | return "f32"; |
591 | 85 | case Type::DoubleTyID: |
592 | 85 | return "f64"; |
593 | 4.75k | case Type::PointerTyID: |
594 | 4.75k | return getValueType(Ty->getPointerElementType(), TypeName); |
595 | 1.71k | case Type::VectorTyID: |
596 | 1.71k | return getValueType(Ty->getVectorElementType(), TypeName); |
597 | 282 | default: |
598 | 282 | return "struct"; |
599 | 14.5k | } |
600 | 14.5k | } |
601 | | |
602 | 66 | std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { |
603 | 66 | switch (Ty->getTypeID()) { |
604 | 66 | case Type::IntegerTyID: { |
605 | 36 | if (!Signed) |
606 | 6 | return (Twine('u') + getTypeName(Ty, true)).str(); |
607 | 30 | |
608 | 30 | auto BitWidth = Ty->getIntegerBitWidth(); |
609 | 30 | switch (BitWidth) { |
610 | 30 | case 8: |
611 | 6 | return "char"; |
612 | 30 | case 16: |
613 | 6 | return "short"; |
614 | 30 | case 32: |
615 | 12 | return "int"; |
616 | 30 | case 64: |
617 | 6 | return "long"; |
618 | 30 | default: |
619 | 0 | return (Twine('i') + Twine(BitWidth)).str(); |
620 | 0 | } |
621 | 0 | } |
622 | 6 | case Type::HalfTyID: |
623 | 6 | return "half"; |
624 | 6 | case Type::FloatTyID: |
625 | 6 | return "float"; |
626 | 6 | case Type::DoubleTyID: |
627 | 6 | return "double"; |
628 | 6 | case Type::VectorTyID: { |
629 | 6 | auto VecTy = cast<VectorType>(Ty); |
630 | 6 | auto ElTy = VecTy->getElementType(); |
631 | 6 | auto NumElements = VecTy->getVectorNumElements(); |
632 | 6 | return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); |
633 | 0 | } |
634 | 6 | default: |
635 | 6 | return "unknown"; |
636 | 66 | } |
637 | 66 | } |
638 | | |
639 | | msgpack::ArrayDocNode |
640 | 12 | MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { |
641 | 12 | auto Dims = HSAMetadataDoc->getArrayNode(); |
642 | 12 | if (Node->getNumOperands() != 3) |
643 | 0 | return Dims; |
644 | 12 | |
645 | 12 | for (auto &Op : Node->operands()) |
646 | 36 | Dims.push_back(Dims.getDocument()->getNode( |
647 | 36 | uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); |
648 | 12 | return Dims; |
649 | 12 | } |
650 | | |
651 | 290 | void MetadataStreamerV3::emitVersion() { |
652 | 290 | auto Version = HSAMetadataDoc->getArrayNode(); |
653 | 290 | Version.push_back(Version.getDocument()->getNode(VersionMajor)); |
654 | 290 | Version.push_back(Version.getDocument()->getNode(VersionMinor)); |
655 | 290 | getRootMetadata("amdhsa.version") = Version; |
656 | 290 | } |
657 | | |
658 | 290 | void MetadataStreamerV3::emitPrintf(const Module &Mod) { |
659 | 290 | auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); |
660 | 290 | if (!Node) |
661 | 284 | return; |
662 | 6 | |
663 | 6 | auto Printf = HSAMetadataDoc->getArrayNode(); |
664 | 6 | for (auto Op : Node->operands()) |
665 | 12 | if (Op->getNumOperands()) |
666 | 12 | Printf.push_back(Printf.getDocument()->getNode( |
667 | 12 | cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); |
668 | 6 | getRootMetadata("amdhsa.printf") = Printf; |
669 | 6 | } |
670 | | |
671 | | void MetadataStreamerV3::emitKernelLanguage(const Function &Func, |
672 | 3.26k | msgpack::MapDocNode Kern) { |
673 | 3.26k | // TODO: What about other languages? |
674 | 3.26k | auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); |
675 | 3.26k | if (!Node || !Node->getNumOperands()222 ) |
676 | 3.04k | return; |
677 | 222 | auto Op0 = Node->getOperand(0); |
678 | 222 | if (Op0->getNumOperands() <= 1) |
679 | 0 | return; |
680 | 222 | |
681 | 222 | Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); |
682 | 222 | auto LanguageVersion = Kern.getDocument()->getArrayNode(); |
683 | 222 | LanguageVersion.push_back(Kern.getDocument()->getNode( |
684 | 222 | mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); |
685 | 222 | LanguageVersion.push_back(Kern.getDocument()->getNode( |
686 | 222 | mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); |
687 | 222 | Kern[".language_version"] = LanguageVersion; |
688 | 222 | } |
689 | | |
690 | | void MetadataStreamerV3::emitKernelAttrs(const Function &Func, |
691 | 3.26k | msgpack::MapDocNode Kern) { |
692 | 3.26k | |
693 | 3.26k | if (auto Node = Func.getMetadata("reqd_work_group_size")) |
694 | 6 | Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); |
695 | 3.26k | if (auto Node = Func.getMetadata("work_group_size_hint")) |
696 | 6 | Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); |
697 | 3.26k | if (auto Node = Func.getMetadata("vec_type_hint")) { |
698 | 54 | Kern[".vec_type_hint"] = Kern.getDocument()->getNode( |
699 | 54 | getTypeName( |
700 | 54 | cast<ValueAsMetadata>(Node->getOperand(0))->getType(), |
701 | 54 | mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), |
702 | 54 | /*Copy=*/true); |
703 | 54 | } |
704 | 3.26k | if (Func.hasFnAttribute("runtime-handle")) { |
705 | 6 | Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( |
706 | 6 | Func.getFnAttribute("runtime-handle").getValueAsString().str(), |
707 | 6 | /*Copy=*/true); |
708 | 6 | } |
709 | 3.26k | } |
710 | | |
711 | | void MetadataStreamerV3::emitKernelArgs(const Function &Func, |
712 | 3.26k | msgpack::MapDocNode Kern) { |
713 | 3.26k | unsigned Offset = 0; |
714 | 3.26k | auto Args = HSAMetadataDoc->getArrayNode(); |
715 | 3.26k | for (auto &Arg : Func.args()) |
716 | 6.56k | emitKernelArg(Arg, Offset, Args); |
717 | 3.26k | |
718 | 3.26k | emitHiddenKernelArgs(Func, Offset, Args); |
719 | 3.26k | |
720 | 3.26k | Kern[".args"] = Args; |
721 | 3.26k | } |
722 | | |
723 | | void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, |
724 | 6.56k | msgpack::ArrayDocNode Args) { |
725 | 6.56k | auto Func = Arg.getParent(); |
726 | 6.56k | auto ArgNo = Arg.getArgNo(); |
727 | 6.56k | const MDNode *Node; |
728 | 6.56k | |
729 | 6.56k | StringRef Name; |
730 | 6.56k | Node = Func->getMetadata("kernel_arg_name"); |
731 | 6.56k | if (Node && ArgNo < Node->getNumOperands()0 ) |
732 | 0 | Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
733 | 6.56k | else if (Arg.hasName()) |
734 | 6.36k | Name = Arg.getName(); |
735 | 6.56k | |
736 | 6.56k | StringRef TypeName; |
737 | 6.56k | Node = Func->getMetadata("kernel_arg_type"); |
738 | 6.56k | if (Node && ArgNo < Node->getNumOperands()379 ) |
739 | 367 | TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
740 | 6.56k | |
741 | 6.56k | StringRef BaseTypeName; |
742 | 6.56k | Node = Func->getMetadata("kernel_arg_base_type"); |
743 | 6.56k | if (Node && ArgNo < Node->getNumOperands()379 ) |
744 | 367 | BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
745 | 6.56k | |
746 | 6.56k | StringRef AccQual; |
747 | 6.56k | if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory()3.86k && |
748 | 6.56k | Arg.hasNoAliasAttr()30 ) { |
749 | 4 | AccQual = "read_only"; |
750 | 6.56k | } else { |
751 | 6.56k | Node = Func->getMetadata("kernel_arg_access_qual"); |
752 | 6.56k | if (Node && ArgNo < Node->getNumOperands()342 ) |
753 | 330 | AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
754 | 6.56k | } |
755 | 6.56k | |
756 | 6.56k | StringRef TypeQual; |
757 | 6.56k | Node = Func->getMetadata("kernel_arg_type_qual"); |
758 | 6.56k | if (Node && ArgNo < Node->getNumOperands()343 ) |
759 | 331 | TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); |
760 | 6.56k | |
761 | 6.56k | Type *Ty = Arg.getType(); |
762 | 6.56k | const DataLayout &DL = Func->getParent()->getDataLayout(); |
763 | 6.56k | |
764 | 6.56k | unsigned PointeeAlign = 0; |
765 | 6.56k | if (auto PtrTy = dyn_cast<PointerType>(Ty)) { |
766 | 3.86k | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { |
767 | 178 | PointeeAlign = Arg.getParamAlignment(); |
768 | 178 | if (PointeeAlign == 0) |
769 | 136 | PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); |
770 | 178 | } |
771 | 3.86k | } |
772 | 6.56k | |
773 | 6.56k | emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), |
774 | 6.56k | getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, |
775 | 6.56k | Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, |
776 | 6.56k | TypeQual); |
777 | 6.56k | } |
778 | | |
779 | | void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, |
780 | | StringRef ValueKind, unsigned &Offset, |
781 | | msgpack::ArrayDocNode Args, |
782 | | unsigned PointeeAlign, StringRef Name, |
783 | | StringRef TypeName, |
784 | | StringRef BaseTypeName, |
785 | 8.12k | StringRef AccQual, StringRef TypeQual) { |
786 | 8.12k | auto Arg = Args.getDocument()->getMapNode(); |
787 | 8.12k | |
788 | 8.12k | if (!Name.empty()) |
789 | 6.36k | Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); |
790 | 8.12k | if (!TypeName.empty()) |
791 | 367 | Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); |
792 | 8.12k | auto Size = DL.getTypeAllocSize(Ty); |
793 | 8.12k | auto Align = DL.getABITypeAlignment(Ty); |
794 | 8.12k | Arg[".size"] = Arg.getDocument()->getNode(Size); |
795 | 8.12k | Offset = alignTo(Offset, Align); |
796 | 8.12k | Arg[".offset"] = Arg.getDocument()->getNode(Offset); |
797 | 8.12k | Offset += Size; |
798 | 8.12k | Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); |
799 | 8.12k | Arg[".value_type"] = |
800 | 8.12k | Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true); |
801 | 8.12k | if (PointeeAlign) |
802 | 178 | Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign); |
803 | 8.12k | |
804 | 8.12k | if (auto PtrTy = dyn_cast<PointerType>(Ty)) |
805 | 4.73k | if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) |
806 | 4.73k | Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); |
807 | 8.12k | |
808 | 8.12k | if (auto AQ = getAccessQualifier(AccQual)) |
809 | 22 | Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); |
810 | 8.12k | |
811 | 8.12k | // TODO: Emit Arg[".actual_access"]. |
812 | 8.12k | |
813 | 8.12k | SmallVector<StringRef, 1> SplitTypeQuals; |
814 | 8.12k | TypeQual.split(SplitTypeQuals, " ", -1, false); |
815 | 8.12k | for (StringRef Key : SplitTypeQuals) { |
816 | 26 | if (Key == "const") |
817 | 7 | Arg[".is_const"] = Arg.getDocument()->getNode(true); |
818 | 19 | else if (Key == "restrict") |
819 | 7 | Arg[".is_restrict"] = Arg.getDocument()->getNode(true); |
820 | 12 | else if (Key == "volatile") |
821 | 6 | Arg[".is_volatile"] = Arg.getDocument()->getNode(true); |
822 | 6 | else if (Key == "pipe") |
823 | 6 | Arg[".is_pipe"] = Arg.getDocument()->getNode(true); |
824 | 26 | } |
825 | 8.12k | |
826 | 8.12k | Args.push_back(Arg); |
827 | 8.12k | } |
828 | | |
829 | | void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, |
830 | | unsigned &Offset, |
831 | 3.26k | msgpack::ArrayDocNode Args) { |
832 | 3.26k | int HiddenArgNumBytes = |
833 | 3.26k | getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); |
834 | 3.26k | |
835 | 3.26k | if (!HiddenArgNumBytes) |
836 | 3.03k | return; |
837 | 232 | |
838 | 232 | auto &DL = Func.getParent()->getDataLayout(); |
839 | 232 | auto Int64Ty = Type::getInt64Ty(Func.getContext()); |
840 | 232 | |
841 | 232 | if (HiddenArgNumBytes >= 8) |
842 | 232 | emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); |
843 | 232 | if (HiddenArgNumBytes >= 16) |
844 | 229 | emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); |
845 | 232 | if (HiddenArgNumBytes >= 24) |
846 | 226 | emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); |
847 | 232 | |
848 | 232 | auto Int8PtrTy = |
849 | 232 | Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); |
850 | 232 | |
851 | 232 | // Emit "printf buffer" argument if printf is used, otherwise emit dummy |
852 | 232 | // "none" argument. |
853 | 232 | if (HiddenArgNumBytes >= 32) { |
854 | 223 | if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) |
855 | 210 | emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); |
856 | 13 | else |
857 | 13 | emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); |
858 | 223 | } |
859 | 232 | |
860 | 232 | // Emit "default queue" and "completion action" arguments if enqueue kernel is |
861 | 232 | // used, otherwise emit dummy "none" arguments. |
862 | 232 | if (HiddenArgNumBytes >= 48) { |
863 | 220 | if (Func.hasFnAttribute("calls-enqueue-kernel")) { |
864 | 8 | emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); |
865 | 8 | emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); |
866 | 212 | } else { |
867 | 212 | emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); |
868 | 212 | emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); |
869 | 212 | } |
870 | 220 | } |
871 | 232 | |
872 | 232 | // Emit the pointer argument for multi-grid object. |
873 | 232 | if (HiddenArgNumBytes >= 56) |
874 | 213 | emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args); |
875 | 232 | } |
876 | | |
877 | | msgpack::MapDocNode |
878 | | MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, |
879 | 3.26k | const SIProgramInfo &ProgramInfo) const { |
880 | 3.26k | const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); |
881 | 3.26k | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
882 | 3.26k | const Function &F = MF.getFunction(); |
883 | 3.26k | |
884 | 3.26k | auto Kern = HSAMetadataDoc->getMapNode(); |
885 | 3.26k | |
886 | 3.26k | unsigned MaxKernArgAlign; |
887 | 3.26k | Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( |
888 | 3.26k | STM.getKernArgSegmentSize(F, MaxKernArgAlign)); |
889 | 3.26k | Kern[".group_segment_fixed_size"] = |
890 | 3.26k | Kern.getDocument()->getNode(ProgramInfo.LDSSize); |
891 | 3.26k | Kern[".private_segment_fixed_size"] = |
892 | 3.26k | Kern.getDocument()->getNode(ProgramInfo.ScratchSize); |
893 | 3.26k | Kern[".kernarg_segment_align"] = |
894 | 3.26k | Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign)); |
895 | 3.26k | Kern[".wavefront_size"] = |
896 | 3.26k | Kern.getDocument()->getNode(STM.getWavefrontSize()); |
897 | 3.26k | Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); |
898 | 3.26k | Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); |
899 | 3.26k | Kern[".max_flat_workgroup_size"] = |
900 | 3.26k | Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); |
901 | 3.26k | Kern[".sgpr_spill_count"] = |
902 | 3.26k | Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); |
903 | 3.26k | Kern[".vgpr_spill_count"] = |
904 | 3.26k | Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); |
905 | 3.26k | |
906 | 3.26k | return Kern; |
907 | 3.26k | } |
908 | | |
909 | 289 | bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { |
910 | 289 | return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); |
911 | 289 | } |
912 | | |
913 | 290 | void MetadataStreamerV3::begin(const Module &Mod) { |
914 | 290 | emitVersion(); |
915 | 290 | emitPrintf(Mod); |
916 | 290 | getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); |
917 | 290 | } |
918 | | |
919 | 289 | void MetadataStreamerV3::end() { |
920 | 289 | std::string HSAMetadataString; |
921 | 289 | raw_string_ostream StrOS(HSAMetadataString); |
922 | 289 | HSAMetadataDoc->toYAML(StrOS); |
923 | 289 | |
924 | 289 | if (DumpHSAMetadata) |
925 | 4 | dump(StrOS.str()); |
926 | 289 | if (VerifyHSAMetadata) |
927 | 5 | verify(StrOS.str()); |
928 | 289 | } |
929 | | |
930 | | void MetadataStreamerV3::emitKernel(const MachineFunction &MF, |
931 | 3.26k | const SIProgramInfo &ProgramInfo) { |
932 | 3.26k | auto &Func = MF.getFunction(); |
933 | 3.26k | auto Kern = getHSAKernelProps(MF, ProgramInfo); |
934 | 3.26k | |
935 | 3.26k | assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || |
936 | 3.26k | Func.getCallingConv() == CallingConv::SPIR_KERNEL); |
937 | 3.26k | |
938 | 3.26k | auto Kernels = |
939 | 3.26k | getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); |
940 | 3.26k | |
941 | 3.26k | { |
942 | 3.26k | Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); |
943 | 3.26k | Kern[".symbol"] = Kern.getDocument()->getNode( |
944 | 3.26k | (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); |
945 | 3.26k | emitKernelLanguage(Func, Kern); |
946 | 3.26k | emitKernelAttrs(Func, Kern); |
947 | 3.26k | emitKernelArgs(Func, Kern); |
948 | 3.26k | } |
949 | 3.26k | |
950 | 3.26k | Kernels.push_back(Kern); |
951 | 3.26k | } |
952 | | |
953 | | } // end namespace HSAMD |
954 | | } // end namespace AMDGPU |
955 | | } // end namespace llvm |