/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "AMDGPU.h" |
10 | | #include "CommonArgs.h" |
11 | | #include "clang/Basic/TargetID.h" |
12 | | #include "clang/Config/config.h" |
13 | | #include "clang/Driver/Compilation.h" |
14 | | #include "clang/Driver/DriverDiagnostic.h" |
15 | | #include "clang/Driver/InputInfo.h" |
16 | | #include "clang/Driver/Options.h" |
17 | | #include "llvm/ADT/StringExtras.h" |
18 | | #include "llvm/Option/ArgList.h" |
19 | | #include "llvm/Support/Error.h" |
20 | | #include "llvm/Support/LineIterator.h" |
21 | | #include "llvm/Support/Path.h" |
22 | | #include "llvm/Support/Process.h" |
23 | | #include "llvm/Support/VirtualFileSystem.h" |
24 | | #include "llvm/TargetParser/Host.h" |
25 | | #include <optional> |
26 | | #include <system_error> |
27 | | |
28 | | using namespace clang::driver; |
29 | | using namespace clang::driver::tools; |
30 | | using namespace clang::driver::toolchains; |
31 | | using namespace clang; |
32 | | using namespace llvm::opt; |
33 | | |
34 | | // Look for sub-directory starts with PackageName under ROCm candidate path. |
35 | | // If there is one and only one matching sub-directory found, append the |
36 | | // sub-directory to Path. If there is no matching sub-directory or there are |
37 | | // more than one matching sub-directories, diagnose them. Returns the full |
38 | | // path of the package if there is only one matching sub-directory, otherwise |
39 | | // returns an empty string. |
40 | | llvm::SmallString<0> |
41 | | RocmInstallationDetector::findSPACKPackage(const Candidate &Cand, |
42 | 97.6k | StringRef PackageName) { |
43 | 97.6k | if (!Cand.isSPACK()) |
44 | 97.6k | return {}; |
45 | 7 | std::error_code EC; |
46 | 7 | std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str(); |
47 | 7 | llvm::SmallVector<llvm::SmallString<0>> SubDirs; |
48 | 7 | for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC), |
49 | 7 | FileEnd; |
50 | 12 | File != FileEnd && !EC5 ; File.increment(EC)5 ) { |
51 | 5 | llvm::StringRef FileName = llvm::sys::path::filename(File->path()); |
52 | 5 | if (FileName.startswith(Prefix)) { |
53 | 2 | SubDirs.push_back(FileName); |
54 | 2 | if (SubDirs.size() > 1) |
55 | 0 | break; |
56 | 2 | } |
57 | 5 | } |
58 | 7 | if (SubDirs.size() == 1) { |
59 | 2 | auto PackagePath = Cand.Path; |
60 | 2 | llvm::sys::path::append(PackagePath, SubDirs[0]); |
61 | 2 | return PackagePath; |
62 | 2 | } |
63 | 5 | if (SubDirs.size() == 0 && Verbose1 ) { |
64 | 0 | llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path |
65 | 0 | << '\n'; |
66 | 0 | return {}; |
67 | 0 | } |
68 | | |
69 | 5 | if (SubDirs.size() > 1 && Verbose0 ) { |
70 | 0 | llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path |
71 | 0 | << " due to multiple installations for the same version\n"; |
72 | 0 | } |
73 | 5 | return {}; |
74 | 5 | } |
75 | | |
76 | 122 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { |
77 | 122 | assert(!Path.empty()); |
78 | | |
79 | 122 | const StringRef Suffix(".bc"); |
80 | 122 | const StringRef Suffix2(".amdgcn.bc"); |
81 | | |
82 | 122 | std::error_code EC; |
83 | 122 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE; |
84 | 2.54k | !EC && LI != LE; LI = LI.increment(EC)2.42k ) { |
85 | 2.42k | StringRef FilePath = LI->path(); |
86 | 2.42k | StringRef FileName = llvm::sys::path::filename(FilePath); |
87 | 2.42k | if (!FileName.endswith(Suffix)) |
88 | 0 | continue; |
89 | | |
90 | 2.42k | StringRef BaseName; |
91 | 2.42k | if (FileName.endswith(Suffix2)) |
92 | 0 | BaseName = FileName.drop_back(Suffix2.size()); |
93 | 2.42k | else if (FileName.endswith(Suffix)) |
94 | 2.42k | BaseName = FileName.drop_back(Suffix.size()); |
95 | | |
96 | 2.42k | const StringRef ABIVersionPrefix = "oclc_abi_version_"; |
97 | 2.42k | if (BaseName == "ocml") { |
98 | 105 | OCML = FilePath; |
99 | 2.31k | } else if (BaseName == "ockl") { |
100 | 105 | OCKL = FilePath; |
101 | 2.21k | } else if (BaseName == "opencl") { |
102 | 105 | OpenCL = FilePath; |
103 | 2.10k | } else if (BaseName == "hip") { |
104 | 105 | HIP = FilePath; |
105 | 2.00k | } else if (BaseName == "asanrtl") { |
106 | 104 | AsanRTL = FilePath; |
107 | 1.89k | } else if (BaseName == "oclc_finite_only_off") { |
108 | 105 | FiniteOnly.Off = FilePath; |
109 | 1.79k | } else if (BaseName == "oclc_finite_only_on") { |
110 | 105 | FiniteOnly.On = FilePath; |
111 | 1.68k | } else if (BaseName == "oclc_daz_opt_on") { |
112 | 105 | DenormalsAreZero.On = FilePath; |
113 | 1.58k | } else if (BaseName == "oclc_daz_opt_off") { |
114 | 105 | DenormalsAreZero.Off = FilePath; |
115 | 1.47k | } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { |
116 | 105 | CorrectlyRoundedSqrt.On = FilePath; |
117 | 1.37k | } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { |
118 | 105 | CorrectlyRoundedSqrt.Off = FilePath; |
119 | 1.26k | } else if (BaseName == "oclc_unsafe_math_on") { |
120 | 105 | UnsafeMath.On = FilePath; |
121 | 1.16k | } else if (BaseName == "oclc_unsafe_math_off") { |
122 | 105 | UnsafeMath.Off = FilePath; |
123 | 1.05k | } else if (BaseName == "oclc_wavefrontsize64_on") { |
124 | 105 | WavefrontSize64.On = FilePath; |
125 | 954 | } else if (BaseName == "oclc_wavefrontsize64_off") { |
126 | 105 | WavefrontSize64.Off = FilePath; |
127 | 849 | } else if (BaseName.startswith(ABIVersionPrefix)) { |
128 | 202 | unsigned ABIVersionNumber; |
129 | 202 | if (BaseName.drop_front(ABIVersionPrefix.size()) |
130 | 202 | .getAsInteger(/*Redex=*/0, ABIVersionNumber)) |
131 | 0 | continue; |
132 | 202 | ABIVersionMap[ABIVersionNumber] = FilePath.str(); |
133 | 647 | } else { |
134 | | // Process all bitcode filenames that look like |
135 | | // ocl_isa_version_XXX.amdgcn.bc |
136 | 647 | const StringRef DeviceLibPrefix = "oclc_isa_version_"; |
137 | 647 | if (!BaseName.startswith(DeviceLibPrefix)) |
138 | 17 | continue; |
139 | | |
140 | 630 | StringRef IsaVersionNumber = |
141 | 630 | BaseName.drop_front(DeviceLibPrefix.size()); |
142 | | |
143 | 630 | llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; |
144 | 630 | SmallString<8> Tmp; |
145 | 630 | LibDeviceMap.insert( |
146 | 630 | std::make_pair(GfxName.toStringRef(Tmp), FilePath.str())); |
147 | 630 | } |
148 | 2.42k | } |
149 | 122 | } |
150 | | |
151 | | // Parse and extract version numbers from `.hipVersion`. Return `true` if |
152 | | // the parsing fails. |
153 | 181 | bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { |
154 | 181 | SmallVector<StringRef, 4> VersionParts; |
155 | 181 | V.split(VersionParts, '\n'); |
156 | 181 | unsigned Major = ~0U; |
157 | 181 | unsigned Minor = ~0U; |
158 | 1.23k | for (auto Part : VersionParts) { |
159 | 1.23k | auto Splits = Part.rtrim().split('='); |
160 | 1.23k | if (Splits.first == "HIP_VERSION_MAJOR") { |
161 | 181 | if (Splits.second.getAsInteger(0, Major)) |
162 | 0 | return true; |
163 | 1.05k | } else if (Splits.first == "HIP_VERSION_MINOR") { |
164 | 181 | if (Splits.second.getAsInteger(0, Minor)) |
165 | 0 | return true; |
166 | 877 | } else if (Splits.first == "HIP_VERSION_PATCH") |
167 | 175 | VersionPatch = Splits.second.str(); |
168 | 1.23k | } |
169 | 181 | if (Major == ~0U || Minor == ~0U) |
170 | 0 | return true; |
171 | 181 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
172 | 181 | DetectedVersion = |
173 | 181 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
174 | 181 | return false; |
175 | 181 | } |
176 | | |
177 | | /// \returns a list of candidate directories for ROCm installation, which is |
178 | | /// cached and populated only once. |
179 | | const SmallVectorImpl<RocmInstallationDetector::Candidate> & |
180 | 31.9k | RocmInstallationDetector::getInstallationPathCandidates() { |
181 | | |
182 | | // Return the cached candidate list if it has already been populated. |
183 | 31.9k | if (!ROCmSearchDirs.empty()) |
184 | 450 | return ROCmSearchDirs; |
185 | | |
186 | 31.5k | auto DoPrintROCmSearchDirs = [&]() 31.5k { |
187 | 31.5k | if (PrintROCmSearchDirs) |
188 | 39 | for (auto Cand : ROCmSearchDirs)13 { |
189 | 39 | llvm::errs() << "ROCm installation search path"; |
190 | 39 | if (Cand.isSPACK()) |
191 | 2 | llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")"; |
192 | 39 | llvm::errs() << ": " << Cand.Path << '\n'; |
193 | 39 | } |
194 | 31.5k | }; |
195 | | |
196 | | // For candidate specified by --rocm-path we do not do strict check, i.e., |
197 | | // checking existence of HIP version file and device library files. |
198 | 31.5k | if (!RocmPathArg.empty()) { |
199 | 161 | ROCmSearchDirs.emplace_back(RocmPathArg.str()); |
200 | 161 | DoPrintROCmSearchDirs(); |
201 | 161 | return ROCmSearchDirs; |
202 | 31.3k | } else if (std::optional<std::string> RocmPathEnv = |
203 | 31.3k | llvm::sys::Process::GetEnv("ROCM_PATH")) { |
204 | 8 | if (!RocmPathEnv->empty()) { |
205 | 8 | ROCmSearchDirs.emplace_back(std::move(*RocmPathEnv)); |
206 | 8 | DoPrintROCmSearchDirs(); |
207 | 8 | return ROCmSearchDirs; |
208 | 8 | } |
209 | 8 | } |
210 | | |
211 | | // Try to find relative to the compiler binary. |
212 | 31.3k | const char *InstallDir = D.getInstalledDir(); |
213 | | |
214 | | // Check both a normal Unix prefix position of the clang binary, as well as |
215 | | // the Windows-esque layout the ROCm packages use with the host architecture |
216 | | // subdirectory of bin. |
217 | 32.2k | auto DeduceROCmPath = [](StringRef ClangPath) { |
218 | | // Strip off directory (usually bin) |
219 | 32.2k | StringRef ParentDir = llvm::sys::path::parent_path(ClangPath); |
220 | 32.2k | StringRef ParentName = llvm::sys::path::filename(ParentDir); |
221 | | |
222 | | // Some builds use bin/{host arch}, so go up again. |
223 | 32.2k | if (ParentName == "bin") { |
224 | 16 | ParentDir = llvm::sys::path::parent_path(ParentDir); |
225 | 16 | ParentName = llvm::sys::path::filename(ParentDir); |
226 | 16 | } |
227 | | |
228 | | // Detect ROCm packages built with SPACK. |
229 | | // clang is installed at |
230 | | // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory. |
231 | | // We only consider the parent directory of llvm-amdgpu package as ROCm |
232 | | // installation candidate for SPACK. |
233 | 32.2k | if (ParentName.startswith("llvm-amdgpu-")) { |
234 | 4 | auto SPACKPostfix = |
235 | 4 | ParentName.drop_front(strlen("llvm-amdgpu-")).split('-'); |
236 | 4 | auto SPACKReleaseStr = SPACKPostfix.first; |
237 | 4 | if (!SPACKReleaseStr.empty()) { |
238 | 4 | ParentDir = llvm::sys::path::parent_path(ParentDir); |
239 | 4 | return Candidate(ParentDir.str(), /*StrictChecking=*/true, |
240 | 4 | SPACKReleaseStr); |
241 | 4 | } |
242 | 4 | } |
243 | | |
244 | | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin |
245 | | // Some versions of the aomp package install to /opt/rocm/aomp/bin |
246 | 32.2k | if (32.2k ParentName == "llvm"32.2k || ParentName.startswith("aomp")) |
247 | 0 | ParentDir = llvm::sys::path::parent_path(ParentDir); |
248 | | |
249 | 32.2k | return Candidate(ParentDir.str(), /*StrictChecking=*/true); |
250 | 32.2k | }; |
251 | | |
252 | | // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic |
253 | | // link of clang itself. |
254 | 31.3k | ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir)); |
255 | | |
256 | | // Deduce ROCm path by the real path of the invoked clang, resolving symbolic |
257 | | // link of clang itself. |
258 | 31.3k | llvm::SmallString<256> RealClangPath; |
259 | 31.3k | llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath); |
260 | 31.3k | auto ParentPath = llvm::sys::path::parent_path(RealClangPath); |
261 | 31.3k | if (ParentPath != InstallDir) |
262 | 951 | ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath)); |
263 | | |
264 | | // Device library may be installed in clang or resource directory. |
265 | 31.3k | auto ClangRoot = llvm::sys::path::parent_path(InstallDir); |
266 | 31.3k | auto RealClangRoot = llvm::sys::path::parent_path(ParentPath); |
267 | 31.3k | ROCmSearchDirs.emplace_back(ClangRoot.str(), /*StrictChecking=*/true); |
268 | 31.3k | if (RealClangRoot != ClangRoot) |
269 | 951 | ROCmSearchDirs.emplace_back(RealClangRoot.str(), /*StrictChecking=*/true); |
270 | 31.3k | ROCmSearchDirs.emplace_back(D.ResourceDir, |
271 | 31.3k | /*StrictChecking=*/true); |
272 | | |
273 | 31.3k | ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm", |
274 | 31.3k | /*StrictChecking=*/true); |
275 | | |
276 | | // Find the latest /opt/rocm-{release} directory. |
277 | 31.3k | std::error_code EC; |
278 | 31.3k | std::string LatestROCm; |
279 | 31.3k | llvm::VersionTuple LatestVer; |
280 | | // Get ROCm version from ROCm directory name. |
281 | 31.3k | auto GetROCmVersion = [](StringRef DirName) { |
282 | 8 | llvm::VersionTuple V; |
283 | 8 | std::string VerStr = DirName.drop_front(strlen("rocm-")).str(); |
284 | | // The ROCm directory name follows the format of |
285 | | // rocm-{major}.{minor}.{subMinor}[-{build}] |
286 | 8 | std::replace(VerStr.begin(), VerStr.end(), '-', '.'); |
287 | 8 | V.tryParse(VerStr); |
288 | 8 | return V; |
289 | 8 | }; |
290 | 31.3k | for (llvm::vfs::directory_iterator |
291 | 31.3k | File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC), |
292 | 31.3k | FileEnd; |
293 | 91.6k | File != FileEnd && !EC60.3k ; File.increment(EC)60.3k ) { |
294 | 60.3k | llvm::StringRef FileName = llvm::sys::path::filename(File->path()); |
295 | 60.3k | if (!FileName.startswith("rocm-")) |
296 | 60.3k | continue; |
297 | 10 | if (LatestROCm.empty()) { |
298 | 4 | LatestROCm = FileName.str(); |
299 | 4 | LatestVer = GetROCmVersion(LatestROCm); |
300 | 4 | continue; |
301 | 4 | } |
302 | 6 | auto Ver = GetROCmVersion(FileName); |
303 | 6 | if (LatestVer < Ver) { |
304 | 4 | LatestROCm = FileName.str(); |
305 | 4 | LatestVer = Ver; |
306 | 4 | } |
307 | 6 | } |
308 | 31.3k | if (!LatestROCm.empty()) |
309 | 4 | ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm, |
310 | 4 | /*StrictChecking=*/true); |
311 | | |
312 | 31.3k | ROCmSearchDirs.emplace_back(D.SysRoot + "/usr/local", |
313 | 31.3k | /*StrictChecking=*/true); |
314 | 31.3k | ROCmSearchDirs.emplace_back(D.SysRoot + "/usr", |
315 | 31.3k | /*StrictChecking=*/true); |
316 | | |
317 | 31.3k | DoPrintROCmSearchDirs(); |
318 | 31.3k | return ROCmSearchDirs; |
319 | 31.5k | } |
320 | | |
321 | | RocmInstallationDetector::RocmInstallationDetector( |
322 | | const Driver &D, const llvm::Triple &HostTriple, |
323 | | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) |
324 | 31.5k | : D(D) { |
325 | 31.5k | Verbose = Args.hasArg(options::OPT_v); |
326 | 31.5k | RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ); |
327 | 31.5k | PrintROCmSearchDirs = |
328 | 31.5k | Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs); |
329 | 31.5k | RocmDeviceLibPathArg = |
330 | 31.5k | Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); |
331 | 31.5k | HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); |
332 | 31.5k | if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { |
333 | 6 | HIPVersionArg = A->getValue(); |
334 | 6 | unsigned Major = ~0U; |
335 | 6 | unsigned Minor = ~0U; |
336 | 6 | SmallVector<StringRef, 3> Parts; |
337 | 6 | HIPVersionArg.split(Parts, '.'); |
338 | 6 | if (Parts.size()) |
339 | 6 | Parts[0].getAsInteger(0, Major); |
340 | 6 | if (Parts.size() > 1) |
341 | 5 | Parts[1].getAsInteger(0, Minor); |
342 | 6 | if (Parts.size() > 2) |
343 | 3 | VersionPatch = Parts[2].str(); |
344 | 6 | if (VersionPatch.empty()) |
345 | 3 | VersionPatch = "0"; |
346 | 6 | if (Major != ~0U && Minor == ~0U5 ) |
347 | 1 | Minor = 0; |
348 | 6 | if (Major == ~0U || Minor == ~0U5 ) |
349 | 1 | D.Diag(diag::err_drv_invalid_value) |
350 | 1 | << A->getAsString(Args) << HIPVersionArg; |
351 | | |
352 | 6 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
353 | 6 | DetectedVersion = |
354 | 6 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
355 | 31.5k | } else { |
356 | 31.5k | VersionPatch = DefaultVersionPatch; |
357 | 31.5k | VersionMajorMinor = |
358 | 31.5k | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); |
359 | 31.5k | DetectedVersion = (Twine(DefaultVersionMajor) + "." + |
360 | 31.5k | Twine(DefaultVersionMinor) + "." + VersionPatch) |
361 | 31.5k | .str(); |
362 | 31.5k | } |
363 | | |
364 | 31.5k | if (DetectHIPRuntime) |
365 | 31.5k | detectHIPRuntime(); |
366 | 31.5k | if (DetectDeviceLib) |
367 | 0 | detectDeviceLibrary(); |
368 | 31.5k | } |
369 | | |
370 | 487 | void RocmInstallationDetector::detectDeviceLibrary() { |
371 | 487 | assert(LibDevicePath.empty()); |
372 | | |
373 | 487 | if (!RocmDeviceLibPathArg.empty()) |
374 | 27 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; |
375 | 460 | else if (std::optional<std::string> LibPathEnv = |
376 | 460 | llvm::sys::Process::GetEnv("HIP_DEVICE_LIB_PATH")) |
377 | 3 | LibDevicePath = std::move(*LibPathEnv); |
378 | | |
379 | 487 | auto &FS = D.getVFS(); |
380 | 487 | if (!LibDevicePath.empty()) { |
381 | | // Maintain compatability with HIP flag/envvar pointing directly at the |
382 | | // bitcode library directory. This points directly at the library path instead |
383 | | // of the rocm root installation. |
384 | 30 | if (!FS.exists(LibDevicePath)) |
385 | 0 | return; |
386 | | |
387 | 30 | scanLibDevicePath(LibDevicePath); |
388 | 30 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty()13 ; |
389 | 30 | return; |
390 | 30 | } |
391 | | |
392 | | // Check device library exists at the given path. |
393 | 2.73k | auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) 457 { |
394 | 2.73k | bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking0 ); |
395 | 2.73k | if (CheckLibDevice && !FS.exists(Path)) |
396 | 2.64k | return false; |
397 | | |
398 | 92 | scanLibDevicePath(Path); |
399 | | |
400 | 92 | if (!NoBuiltinLibs) { |
401 | | // Check that the required non-target libraries are all available. |
402 | 92 | if (!allGenericLibsValid()) |
403 | 0 | return false; |
404 | | |
405 | | // Check that we have found at least one libdevice that we can link in |
406 | | // if -nobuiltinlib hasn't been specified. |
407 | 92 | if (LibDeviceMap.empty()) |
408 | 0 | return false; |
409 | 92 | } |
410 | 92 | return true; |
411 | 92 | }; |
412 | | |
413 | | // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode |
414 | 457 | LibDevicePath = D.ResourceDir; |
415 | 457 | llvm::sys::path::append(LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME, |
416 | 457 | "amdgcn", "bitcode"); |
417 | 457 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true); |
418 | 457 | if (HasDeviceLibrary) |
419 | 1 | return; |
420 | | |
421 | | // Find device libraries in a legacy ROCm directory structure |
422 | | // ${ROCM_ROOT}/amdgcn/bitcode/* |
423 | 456 | auto &ROCmDirs = getInstallationPathCandidates(); |
424 | 2.27k | for (const auto &Candidate : ROCmDirs) { |
425 | 2.27k | LibDevicePath = Candidate.Path; |
426 | 2.27k | llvm::sys::path::append(LibDevicePath, "amdgcn", "bitcode"); |
427 | 2.27k | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); |
428 | 2.27k | if (HasDeviceLibrary) |
429 | 91 | return; |
430 | 2.27k | } |
431 | 456 | } |
432 | | |
433 | 31.5k | void RocmInstallationDetector::detectHIPRuntime() { |
434 | 31.5k | SmallVector<Candidate, 4> HIPSearchDirs; |
435 | 31.5k | if (!HIPPathArg.empty()) |
436 | 18 | HIPSearchDirs.emplace_back(HIPPathArg.str()); |
437 | 31.5k | else if (std::optional<std::string> HIPPathEnv = |
438 | 31.5k | llvm::sys::Process::GetEnv("HIP_PATH")) { |
439 | 7 | if (!HIPPathEnv->empty()) |
440 | 5 | HIPSearchDirs.emplace_back(std::move(*HIPPathEnv)); |
441 | 7 | } |
442 | 31.5k | if (HIPSearchDirs.empty()) |
443 | 31.5k | HIPSearchDirs.append(getInstallationPathCandidates()); |
444 | 31.5k | auto &FS = D.getVFS(); |
445 | | |
446 | 190k | for (const auto &Candidate : HIPSearchDirs) { |
447 | 190k | InstallPath = Candidate.Path; |
448 | 190k | if (InstallPath.empty() || !FS.exists(InstallPath)150k ) |
449 | 92.4k | continue; |
450 | | // HIP runtime built by SPACK is installed to |
451 | | // <rocm_root>/hip-<rocm_release_string>-<hash> directory. |
452 | 97.6k | auto SPACKPath = findSPACKPackage(Candidate, "hip"); |
453 | 18.4E | InstallPath = SPACKPath.empty()97.6k ? InstallPath97.6k : SPACKPath; |
454 | | |
455 | 97.6k | BinPath = InstallPath; |
456 | 97.6k | llvm::sys::path::append(BinPath, "bin"); |
457 | 97.6k | IncludePath = InstallPath; |
458 | 97.6k | llvm::sys::path::append(IncludePath, "include"); |
459 | 97.6k | LibPath = InstallPath; |
460 | 97.6k | llvm::sys::path::append(LibPath, "lib"); |
461 | 97.6k | SharePath = InstallPath; |
462 | 97.6k | llvm::sys::path::append(SharePath, "share"); |
463 | | |
464 | | // Get parent of InstallPath and append "share" |
465 | 97.6k | SmallString<0> ParentSharePath = llvm::sys::path::parent_path(InstallPath); |
466 | 97.6k | llvm::sys::path::append(ParentSharePath, "share"); |
467 | | |
468 | 97.6k | auto Append = [](SmallString<0> &path, const Twine &a, const Twine &b = "", |
469 | 293k | const Twine &c = "", const Twine &d = "") { |
470 | 293k | SmallString<0> newpath = path; |
471 | 293k | llvm::sys::path::append(newpath, a, b, c, d); |
472 | 293k | return newpath; |
473 | 293k | }; |
474 | | // If HIP version file can be found and parsed, use HIP version from there. |
475 | 97.6k | for (const auto &VersionFilePath : |
476 | 97.6k | {Append(SharePath, "hip", "version"), |
477 | 97.6k | Append(ParentSharePath, "hip", "version"), |
478 | 293k | Append(BinPath, ".hipVersion")}) { |
479 | 293k | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
480 | 293k | FS.getBufferForFile(VersionFilePath); |
481 | 293k | if (!VersionFile) |
482 | 292k | continue; |
483 | 181 | if (179 HIPVersionArg.empty()179 && VersionFile) |
484 | 181 | if (parseHIPVersionFile((*VersionFile)->getBuffer())) |
485 | 0 | continue; |
486 | | |
487 | 179 | HasHIPRuntime = true; |
488 | 179 | return; |
489 | 179 | } |
490 | | // Otherwise, if -rocm-path is specified (no strict checking), use the |
491 | | // default HIP version or specified by --hip-version. |
492 | 97.5k | if (!Candidate.StrictChecking) { |
493 | 9 | HasHIPRuntime = true; |
494 | 9 | return; |
495 | 9 | } |
496 | 97.5k | } |
497 | 31.3k | HasHIPRuntime = false; |
498 | 31.3k | } |
499 | | |
500 | 148 | void RocmInstallationDetector::print(raw_ostream &OS) const { |
501 | 148 | if (hasHIPRuntime()) |
502 | 24 | OS << "Found HIP installation: " << InstallPath << ", version " |
503 | 24 | << DetectedVersion << '\n'; |
504 | 148 | } |
505 | | |
506 | | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, |
507 | 631 | ArgStringList &CC1Args) const { |
508 | 631 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && |
509 | 631 | !DriverArgs.hasArg(options::OPT_nohipwrapperinc)153 ; |
510 | | |
511 | 631 | if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { |
512 | | // HIP header includes standard library wrapper headers under clang |
513 | | // cuda_wrappers directory. Since these wrapper headers include_next |
514 | | // standard C++ headers, whereas libc++ headers include_next other clang |
515 | | // headers. The include paths have to follow this order: |
516 | | // - wrapper include path |
517 | | // - standard C++ include path |
518 | | // - other clang include path |
519 | | // Since standard C++ and other clang include paths are added in other |
520 | | // places after this function, here we only need to make sure wrapper |
521 | | // include path is added. |
522 | | // |
523 | | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs |
524 | | // a workaround. |
525 | 629 | SmallString<128> P(D.ResourceDir); |
526 | 629 | if (UsesRuntimeWrapper) |
527 | 150 | llvm::sys::path::append(P, "include", "cuda_wrappers"); |
528 | 629 | CC1Args.push_back("-internal-isystem"); |
529 | 629 | CC1Args.push_back(DriverArgs.MakeArgString(P)); |
530 | 629 | } |
531 | | |
532 | 631 | if (DriverArgs.hasArg(options::OPT_nogpuinc)) |
533 | 507 | return; |
534 | | |
535 | 124 | if (!hasHIPRuntime()) { |
536 | 0 | D.Diag(diag::err_drv_no_hip_runtime); |
537 | 0 | return; |
538 | 0 | } |
539 | | |
540 | 124 | CC1Args.push_back("-idirafter"); |
541 | 124 | CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); |
542 | 124 | if (UsesRuntimeWrapper) |
543 | 121 | CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); |
544 | 124 | } |
545 | | |
546 | | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
547 | | const InputInfo &Output, |
548 | | const InputInfoList &Inputs, |
549 | | const ArgList &Args, |
550 | 67 | const char *LinkingOutput) const { |
551 | | |
552 | 67 | std::string Linker = getToolChain().GetProgramPath(getShortName()); |
553 | 67 | ArgStringList CmdArgs; |
554 | 67 | CmdArgs.push_back("--no-undefined"); |
555 | 67 | CmdArgs.push_back("-shared"); |
556 | | |
557 | 67 | addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs); |
558 | 67 | Args.AddAllArgs(CmdArgs, options::OPT_L); |
559 | 67 | AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); |
560 | 67 | if (C.getDriver().isUsingLTO()) |
561 | 1 | addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], |
562 | 1 | C.getDriver().getLTOMode() == LTOK_Thin); |
563 | 66 | else if (Args.hasArg(options::OPT_mcpu_EQ)) |
564 | 57 | CmdArgs.push_back(Args.MakeArgString( |
565 | 57 | "-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ))); |
566 | 67 | CmdArgs.push_back("-o"); |
567 | 67 | CmdArgs.push_back(Output.getFilename()); |
568 | 67 | C.addCommand(std::make_unique<Command>( |
569 | 67 | JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), |
570 | 67 | CmdArgs, Inputs, Output)); |
571 | 67 | } |
572 | | |
573 | | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
574 | | const llvm::Triple &Triple, |
575 | | const llvm::opt::ArgList &Args, |
576 | 1.05k | std::vector<StringRef> &Features) { |
577 | | // Add target ID features to -target-feature options. No diagnostics should |
578 | | // be emitted here since invalid target ID is diagnosed at other places. |
579 | 1.05k | StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); |
580 | 1.05k | if (!TargetID.empty()) { |
581 | 1.01k | llvm::StringMap<bool> FeatureMap; |
582 | 1.01k | auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap); |
583 | 1.01k | if (OptionalGpuArch) { |
584 | 994 | StringRef GpuArch = *OptionalGpuArch; |
585 | | // Iterate through all possible target ID features for the given GPU. |
586 | | // If it is mapped to true, add +feature. |
587 | | // If it is mapped to false, add -feature. |
588 | | // If it is not in the map (default), do not add it |
589 | 994 | for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) { |
590 | 982 | auto Pos = FeatureMap.find(Feature); |
591 | 982 | if (Pos == FeatureMap.end()) |
592 | 863 | continue; |
593 | 119 | Features.push_back(Args.MakeArgStringRef( |
594 | 119 | (Twine(Pos->second ? "+"77 : "-"42 ) + Feature).str())); |
595 | 119 | } |
596 | 994 | } |
597 | 1.01k | } |
598 | | |
599 | 1.05k | if (Args.hasFlag(options::OPT_mwavefrontsize64, |
600 | 1.05k | options::OPT_mno_wavefrontsize64, false)) |
601 | 13 | Features.push_back("+wavefrontsize64"); |
602 | | |
603 | 1.05k | handleTargetFeaturesGroup(D, Triple, Args, Features, |
604 | 1.05k | options::OPT_m_amdgpu_Features_Group); |
605 | 1.05k | } |
606 | | |
607 | | /// AMDGPU Toolchain |
608 | | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
609 | | const ArgList &Args) |
610 | 510 | : Generic_ELF(D, Triple, Args), |
611 | 510 | OptionsDefault( |
612 | 510 | {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) { |
613 | | // Check code object version options. Emit warnings for legacy options |
614 | | // and errors for the last invalid code object version options. |
615 | | // It is done here to avoid repeated warning or error messages for |
616 | | // each tool invocation. |
617 | 510 | checkAMDGPUCodeObjectVersion(D, Args); |
618 | 510 | } |
619 | | |
620 | 67 | Tool *AMDGPUToolChain::buildLinker() const { |
621 | 67 | return new tools::amdgpu::Linker(*this); |
622 | 67 | } |
623 | | |
624 | | DerivedArgList * |
625 | | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, |
626 | 113 | Action::OffloadKind DeviceOffloadKind) const { |
627 | | |
628 | 113 | DerivedArgList *DAL = |
629 | 113 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
630 | | |
631 | 113 | const OptTable &Opts = getDriver().getOpts(); |
632 | | |
633 | 113 | if (!DAL) |
634 | 113 | DAL = new DerivedArgList(Args.getBaseArgs()); |
635 | | |
636 | 113 | for (Arg *A : Args) |
637 | 882 | DAL->append(A); |
638 | | |
639 | | // Replace -mcpu=native with detected GPU. |
640 | 113 | Arg *LastMCPUArg = DAL->getLastArg(options::OPT_mcpu_EQ); |
641 | 113 | if (LastMCPUArg && StringRef(LastMCPUArg->getValue()) == "native"85 ) { |
642 | 0 | DAL->eraseArg(options::OPT_mcpu_EQ); |
643 | 0 | auto GPUsOrErr = getSystemGPUArchs(Args); |
644 | 0 | if (!GPUsOrErr) { |
645 | 0 | getDriver().Diag(diag::err_drv_undetermined_gpu_arch) |
646 | 0 | << llvm::Triple::getArchTypeName(getArch()) |
647 | 0 | << llvm::toString(GPUsOrErr.takeError()) << "-mcpu"; |
648 | 0 | } else { |
649 | 0 | auto &GPUs = *GPUsOrErr; |
650 | 0 | if (GPUs.size() > 1) { |
651 | 0 | getDriver().Diag(diag::warn_drv_multi_gpu_arch) |
652 | 0 | << llvm::Triple::getArchTypeName(getArch()) |
653 | 0 | << llvm::join(GPUs, ", ") << "-mcpu"; |
654 | 0 | } |
655 | 0 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), |
656 | 0 | Args.MakeArgString(GPUs.front())); |
657 | 0 | } |
658 | 0 | } |
659 | | |
660 | 113 | checkTargetID(*DAL); |
661 | | |
662 | 113 | if (!Args.getLastArgValue(options::OPT_x).equals("cl")) |
663 | 76 | return DAL; |
664 | | |
665 | | // Phase 1 (.cl -> .bc) |
666 | 37 | if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)17 ) { |
667 | 17 | DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit() |
668 | 17 | ? options::OPT_m64 |
669 | 17 | : options::OPT_m320 )); |
670 | | |
671 | | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately |
672 | | // as they defined that way in Options.td |
673 | 17 | if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4, |
674 | 17 | options::OPT_Ofast)) |
675 | 9 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O), |
676 | 9 | getOptionDefault(options::OPT_O)); |
677 | 17 | } |
678 | | |
679 | 37 | return DAL; |
680 | 113 | } |
681 | | |
682 | | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( |
683 | 612 | llvm::AMDGPU::GPUKind Kind) { |
684 | | |
685 | | // Assume nothing without a specific target. |
686 | 612 | if (Kind == llvm::AMDGPU::GK_NONE) |
687 | 34 | return false; |
688 | | |
689 | 578 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); |
690 | | |
691 | | // Default to enabling f32 denormals by default on subtargets where fma is |
692 | | // fast with denormals |
693 | 578 | const bool BothDenormAndFMAFast = |
694 | 578 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && |
695 | 578 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32)450 ; |
696 | 578 | return !BothDenormAndFMAFast; |
697 | 612 | } |
698 | | |
699 | | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( |
700 | | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, |
701 | 1.07k | const llvm::fltSemantics *FPType) const { |
702 | | // Denormals should always be enabled for f16 and f64. |
703 | 1.07k | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()536 ) |
704 | 536 | return llvm::DenormalMode::getIEEE(); |
705 | | |
706 | 536 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || |
707 | 536 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda109 ) { |
708 | 427 | auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch()); |
709 | 427 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch); |
710 | 427 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && |
711 | 427 | DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
712 | 427 | options::OPT_fno_gpu_flush_denormals_to_zero, |
713 | 427 | getDefaultDenormsAreZeroForTarget(Kind))) |
714 | 75 | return llvm::DenormalMode::getPreserveSign(); |
715 | | |
716 | 352 | return llvm::DenormalMode::getIEEE(); |
717 | 427 | } |
718 | | |
719 | 109 | const StringRef GpuArch = getGPUArch(DriverArgs); |
720 | 109 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); |
721 | | |
722 | | // TODO: There are way too many flags that change this. Do we need to check |
723 | | // them all? |
724 | 109 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
725 | 109 | getDefaultDenormsAreZeroForTarget(Kind)102 ; |
726 | | |
727 | | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are |
728 | | // also implicit treated as zero (DAZ). |
729 | 109 | return DAZ ? llvm::DenormalMode::getPreserveSign()43 : |
730 | 109 | llvm::DenormalMode::getIEEE()66 ; |
731 | 536 | } |
732 | | |
733 | | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, |
734 | 85 | llvm::AMDGPU::GPUKind Kind) { |
735 | 85 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); |
736 | 85 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); |
737 | | |
738 | 85 | return !HasWave32 || DriverArgs.hasFlag( |
739 | 15 | options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); |
740 | 85 | } |
741 | | |
742 | | |
743 | | /// ROCM Toolchain |
744 | | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, |
745 | | const ArgList &Args) |
746 | 487 | : AMDGPUToolChain(D, Triple, Args) { |
747 | 487 | RocmInstallation->detectDeviceLibrary(); |
748 | 487 | } |
749 | | |
750 | | void AMDGPUToolChain::addClangTargetOptions( |
751 | | const llvm::opt::ArgList &DriverArgs, |
752 | | llvm::opt::ArgStringList &CC1Args, |
753 | 100 | Action::OffloadKind DeviceOffloadingKind) const { |
754 | | // Default to "hidden" visibility, as object level linking will not be |
755 | | // supported for the foreseeable future. |
756 | 100 | if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, |
757 | 100 | options::OPT_fvisibility_ms_compat)) { |
758 | 96 | CC1Args.push_back("-fvisibility=hidden"); |
759 | 96 | CC1Args.push_back("-fapply-global-visibility-to-externs"); |
760 | 96 | } |
761 | 100 | } |
762 | | |
763 | | StringRef |
764 | 217 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { |
765 | 217 | return getProcessorFromTargetID( |
766 | 217 | getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); |
767 | 217 | } |
768 | | |
769 | | AMDGPUToolChain::ParsedTargetIDType |
770 | 521 | AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { |
771 | 521 | StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); |
772 | 521 | if (TargetID.empty()) |
773 | 28 | return {std::nullopt, std::nullopt, std::nullopt}; |
774 | | |
775 | 493 | llvm::StringMap<bool> FeatureMap; |
776 | 493 | auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap); |
777 | 493 | if (!OptionalGpuArch) |
778 | 14 | return {TargetID.str(), std::nullopt, std::nullopt}; |
779 | | |
780 | 479 | return {TargetID.str(), OptionalGpuArch->str(), FeatureMap}; |
781 | 493 | } |
782 | | |
783 | | void AMDGPUToolChain::checkTargetID( |
784 | 113 | const llvm::opt::ArgList &DriverArgs) const { |
785 | 113 | auto PTID = getParsedTargetID(DriverArgs); |
786 | 113 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch85 ) { |
787 | 14 | getDriver().Diag(clang::diag::err_drv_bad_target_id) |
788 | 14 | << *PTID.OptionalTargetID; |
789 | 14 | } |
790 | 113 | } |
791 | | |
792 | | Expected<SmallVector<std::string>> |
793 | 0 | AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { |
794 | | // Detect AMD GPUs availible on the system. |
795 | 0 | std::string Program; |
796 | 0 | if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) |
797 | 0 | Program = A->getValue(); |
798 | 0 | else |
799 | 0 | Program = GetProgramPath("amdgpu-arch"); |
800 | |
|
801 | 0 | auto StdoutOrErr = executeToolChainProgram(Program); |
802 | 0 | if (!StdoutOrErr) |
803 | 0 | return StdoutOrErr.takeError(); |
804 | | |
805 | 0 | SmallVector<std::string, 1> GPUArchs; |
806 | 0 | for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) |
807 | 0 | if (!Arch.empty()) |
808 | 0 | GPUArchs.push_back(Arch.str()); |
809 | |
|
810 | 0 | if (GPUArchs.empty()) |
811 | 0 | return llvm::createStringError(std::error_code(), |
812 | 0 | "No AMD GPU detected in the system"); |
813 | | |
814 | 0 | return std::move(GPUArchs); |
815 | 0 | } |
816 | | |
817 | | void ROCMToolChain::addClangTargetOptions( |
818 | | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
819 | 81 | Action::OffloadKind DeviceOffloadingKind) const { |
820 | 81 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, |
821 | 81 | DeviceOffloadingKind); |
822 | | |
823 | | // For the OpenCL case where there is no offload target, accept -nostdlib to |
824 | | // disable bitcode linking. |
825 | 81 | if (DeviceOffloadingKind == Action::OFK_None && |
826 | 81 | DriverArgs.hasArg(options::OPT_nostdlib)) |
827 | 8 | return; |
828 | | |
829 | 73 | if (DriverArgs.hasArg(options::OPT_nogpulib)) |
830 | 53 | return; |
831 | | |
832 | | // Get the device name and canonicalize it |
833 | 20 | const StringRef GpuArch = getGPUArch(DriverArgs); |
834 | 20 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); |
835 | 20 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); |
836 | 20 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch); |
837 | 20 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
838 | 20 | getAMDGPUCodeObjectVersion(getDriver(), DriverArgs)); |
839 | 20 | if (!RocmInstallation->checkCommonBitcodeLibs(CanonArch, LibDeviceFile, |
840 | 20 | ABIVer)) |
841 | 3 | return; |
842 | | |
843 | 17 | bool Wave64 = isWave64(DriverArgs, Kind); |
844 | | |
845 | | // TODO: There are way too many flags that change this. Do we need to check |
846 | | // them all? |
847 | 17 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
848 | 17 | getDefaultDenormsAreZeroForTarget(Kind)15 ; |
849 | 17 | bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); |
850 | | |
851 | 17 | bool UnsafeMathOpt = |
852 | 17 | DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); |
853 | 17 | bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); |
854 | 17 | bool CorrectSqrt = |
855 | 17 | DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); |
856 | | |
857 | | // Add the OpenCL specific bitcode library. |
858 | 17 | llvm::SmallVector<std::string, 12> BCLibs; |
859 | 17 | BCLibs.push_back(RocmInstallation->getOpenCLPath().str()); |
860 | | |
861 | | // Add the generic set of libraries. |
862 | 17 | BCLibs.append(RocmInstallation->getCommonBitcodeLibs( |
863 | 17 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
864 | 17 | FastRelaxedMath, CorrectSqrt, ABIVer, false)); |
865 | | |
866 | 170 | for (StringRef BCFile : BCLibs) { |
867 | 170 | CC1Args.push_back("-mlink-builtin-bitcode"); |
868 | 170 | CC1Args.push_back(DriverArgs.MakeArgString(BCFile)); |
869 | 170 | } |
870 | 17 | } |
871 | | |
872 | | bool RocmInstallationDetector::checkCommonBitcodeLibs( |
873 | | StringRef GPUArch, StringRef LibDeviceFile, |
874 | 108 | DeviceLibABIVersion ABIVer) const { |
875 | 108 | if (!hasDeviceLibrary()) { |
876 | 2 | D.Diag(diag::err_drv_no_rocm_device_lib) << 0; |
877 | 2 | return false; |
878 | 2 | } |
879 | 106 | if (LibDeviceFile.empty()) { |
880 | 20 | D.Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; |
881 | 20 | return false; |
882 | 20 | } |
883 | 86 | if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()2 ) { |
884 | 1 | D.Diag(diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString(); |
885 | 1 | return false; |
886 | 1 | } |
887 | 85 | return true; |
888 | 86 | } |
889 | | |
890 | | llvm::SmallVector<std::string, 12> |
891 | | RocmInstallationDetector::getCommonBitcodeLibs( |
892 | | const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, |
893 | | bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, |
894 | 85 | bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool isOpenMP = false) const { |
895 | 85 | llvm::SmallVector<std::string, 12> BCLibs; |
896 | | |
897 | 763 | auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); }; |
898 | | |
899 | 85 | AddBCLib(getOCMLPath()); |
900 | 85 | AddBCLib(getOCKLPath()); |
901 | 85 | AddBCLib(getDenormalsAreZeroPath(DAZ)); |
902 | 85 | AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath83 )); |
903 | 85 | AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath83 )); |
904 | 85 | AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); |
905 | 85 | AddBCLib(getWavefrontSize64Path(Wave64)); |
906 | 85 | AddBCLib(LibDeviceFile); |
907 | 85 | auto ABIVerPath = getABIVersionPath(ABIVer); |
908 | 85 | if (!ABIVerPath.empty()) |
909 | 83 | AddBCLib(ABIVerPath); |
910 | | |
911 | 85 | return BCLibs; |
912 | 85 | } |
913 | | |
914 | | llvm::SmallVector<std::string, 12> |
915 | | ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, |
916 | | const std::string &GPUArch, |
917 | 88 | bool isOpenMP) const { |
918 | 88 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); |
919 | 88 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); |
920 | | |
921 | 88 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch); |
922 | 88 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
923 | 88 | getAMDGPUCodeObjectVersion(getDriver(), DriverArgs)); |
924 | 88 | if (!RocmInstallation->checkCommonBitcodeLibs(CanonArch, LibDeviceFile, |
925 | 88 | ABIVer)) |
926 | 20 | return {}; |
927 | | |
928 | | // If --hip-device-lib is not set, add the default bitcode libraries. |
929 | | // TODO: There are way too many flags that change this. Do we need to check |
930 | | // them all? |
931 | 68 | bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
932 | 68 | options::OPT_fno_gpu_flush_denormals_to_zero, |
933 | 68 | getDefaultDenormsAreZeroForTarget(Kind)); |
934 | 68 | bool FiniteOnly = DriverArgs.hasFlag( |
935 | 68 | options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); |
936 | 68 | bool UnsafeMathOpt = |
937 | 68 | DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, |
938 | 68 | options::OPT_fno_unsafe_math_optimizations, false); |
939 | 68 | bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, |
940 | 68 | options::OPT_fno_fast_math, false); |
941 | 68 | bool CorrectSqrt = DriverArgs.hasFlag( |
942 | 68 | options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, |
943 | 68 | options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true); |
944 | 68 | bool Wave64 = isWave64(DriverArgs, Kind); |
945 | | |
946 | 68 | return RocmInstallation->getCommonBitcodeLibs( |
947 | 68 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
948 | 68 | FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP); |
949 | 88 | } |