/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This contains code to emit Builtin calls as LLVM code. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "ABIInfo.h" |
14 | | #include "CGCUDARuntime.h" |
15 | | #include "CGCXXABI.h" |
16 | | #include "CGObjCRuntime.h" |
17 | | #include "CGOpenCLRuntime.h" |
18 | | #include "CGRecordLayout.h" |
19 | | #include "CodeGenFunction.h" |
20 | | #include "CodeGenModule.h" |
21 | | #include "ConstantEmitter.h" |
22 | | #include "PatternInit.h" |
23 | | #include "TargetInfo.h" |
24 | | #include "clang/AST/ASTContext.h" |
25 | | #include "clang/AST/Attr.h" |
26 | | #include "clang/AST/Decl.h" |
27 | | #include "clang/AST/OSLog.h" |
28 | | #include "clang/AST/OperationKinds.h" |
29 | | #include "clang/Basic/TargetBuiltins.h" |
30 | | #include "clang/Basic/TargetInfo.h" |
31 | | #include "clang/Basic/TargetOptions.h" |
32 | | #include "clang/CodeGen/CGFunctionInfo.h" |
33 | | #include "clang/Frontend/FrontendDiagnostic.h" |
34 | | #include "llvm/ADT/APFloat.h" |
35 | | #include "llvm/ADT/APInt.h" |
36 | | #include "llvm/ADT/FloatingPointMode.h" |
37 | | #include "llvm/ADT/SmallPtrSet.h" |
38 | | #include "llvm/ADT/StringExtras.h" |
39 | | #include "llvm/Analysis/ValueTracking.h" |
40 | | #include "llvm/IR/DataLayout.h" |
41 | | #include "llvm/IR/InlineAsm.h" |
42 | | #include "llvm/IR/Intrinsics.h" |
43 | | #include "llvm/IR/IntrinsicsAArch64.h" |
44 | | #include "llvm/IR/IntrinsicsAMDGPU.h" |
45 | | #include "llvm/IR/IntrinsicsARM.h" |
46 | | #include "llvm/IR/IntrinsicsBPF.h" |
47 | | #include "llvm/IR/IntrinsicsHexagon.h" |
48 | | #include "llvm/IR/IntrinsicsNVPTX.h" |
49 | | #include "llvm/IR/IntrinsicsPowerPC.h" |
50 | | #include "llvm/IR/IntrinsicsR600.h" |
51 | | #include "llvm/IR/IntrinsicsRISCV.h" |
52 | | #include "llvm/IR/IntrinsicsS390.h" |
53 | | #include "llvm/IR/IntrinsicsVE.h" |
54 | | #include "llvm/IR/IntrinsicsWebAssembly.h" |
55 | | #include "llvm/IR/IntrinsicsX86.h" |
56 | | #include "llvm/IR/MDBuilder.h" |
57 | | #include "llvm/IR/MatrixBuilder.h" |
58 | | #include "llvm/Support/ConvertUTF.h" |
59 | | #include "llvm/Support/MathExtras.h" |
60 | | #include "llvm/Support/ScopedPrinter.h" |
61 | | #include "llvm/TargetParser/AArch64TargetParser.h" |
62 | | #include "llvm/TargetParser/X86TargetParser.h" |
63 | | #include <optional> |
64 | | #include <sstream> |
65 | | |
66 | | using namespace clang; |
67 | | using namespace CodeGen; |
68 | | using namespace llvm; |
69 | | |
70 | | static llvm::cl::opt<bool> ClSanitizeAlignmentBuiltin( |
71 | | "sanitize-alignment-builtin", llvm::cl::Hidden, |
72 | | llvm::cl::desc("Instrument builtin functions for -fsanitize=alignment"), |
73 | | llvm::cl::init(true)); |
74 | | |
75 | | static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, |
76 | 33 | Align AlignmentInBytes) { |
77 | 33 | ConstantInt *Byte; |
78 | 33 | switch (CGF.getLangOpts().getTrivialAutoVarInit()) { |
79 | 19 | case LangOptions::TrivialAutoVarInitKind::Uninitialized: |
80 | | // Nothing to initialize. |
81 | 19 | return; |
82 | 7 | case LangOptions::TrivialAutoVarInitKind::Zero: |
83 | 7 | Byte = CGF.Builder.getInt8(0x00); |
84 | 7 | break; |
85 | 7 | case LangOptions::TrivialAutoVarInitKind::Pattern: { |
86 | 7 | llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); |
87 | 7 | Byte = llvm::dyn_cast<llvm::ConstantInt>( |
88 | 7 | initializationPatternFor(CGF.CGM, Int8)); |
89 | 7 | break; |
90 | 0 | } |
91 | 33 | } |
92 | 14 | if (CGF.CGM.stopAutoInit()) |
93 | 8 | return; |
94 | 6 | auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); |
95 | 6 | I->addAnnotationMetadata("auto-init"); |
96 | 6 | } |
97 | | |
98 | | /// getBuiltinLibFunction - Given a builtin id for a function like |
99 | | /// "__builtin_fabsf", return a Function* for "fabsf". |
100 | | llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, |
101 | 1.81k | unsigned BuiltinID) { |
102 | 1.81k | assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); |
103 | | |
104 | | // Get the name, skip over the __builtin_ prefix (if necessary). |
105 | 1.81k | StringRef Name; |
106 | 1.81k | GlobalDecl D(FD); |
107 | | |
108 | | // TODO: This list should be expanded or refactored after all GCC-compatible |
109 | | // std libcall builtins are implemented. |
110 | 1.81k | static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{ |
111 | 1.81k | {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"}, |
112 | 1.81k | {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"}, |
113 | 1.81k | {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"}, |
114 | 1.81k | {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"}, |
115 | 1.81k | {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"}, |
116 | 1.81k | {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"}, |
117 | 1.81k | {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"}, |
118 | 1.81k | {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"}, |
119 | 1.81k | {Builtin::BI__builtin_fprintf, "__fprintfieee128"}, |
120 | 1.81k | {Builtin::BI__builtin_printf, "__printfieee128"}, |
121 | 1.81k | {Builtin::BI__builtin_snprintf, "__snprintfieee128"}, |
122 | 1.81k | {Builtin::BI__builtin_sprintf, "__sprintfieee128"}, |
123 | 1.81k | {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"}, |
124 | 1.81k | {Builtin::BI__builtin_vprintf, "__vprintfieee128"}, |
125 | 1.81k | {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"}, |
126 | 1.81k | {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"}, |
127 | 1.81k | {Builtin::BI__builtin_fscanf, "__fscanfieee128"}, |
128 | 1.81k | {Builtin::BI__builtin_scanf, "__scanfieee128"}, |
129 | 1.81k | {Builtin::BI__builtin_sscanf, "__sscanfieee128"}, |
130 | 1.81k | {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"}, |
131 | 1.81k | {Builtin::BI__builtin_vscanf, "__vscanfieee128"}, |
132 | 1.81k | {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"}, |
133 | 1.81k | {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"}, |
134 | 1.81k | }; |
135 | | |
136 | | // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit |
137 | | // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions |
138 | | // if it is 64-bit 'long double' mode. |
139 | 1.81k | static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{ |
140 | 1.81k | {Builtin::BI__builtin_frexpl, "frexp"}, |
141 | 1.81k | {Builtin::BI__builtin_ldexpl, "ldexp"}, |
142 | 1.81k | {Builtin::BI__builtin_modfl, "modf"}, |
143 | 1.81k | }; |
144 | | |
145 | | // If the builtin has been declared explicitly with an assembler label, |
146 | | // use the mangled name. This differs from the plain label on platforms |
147 | | // that prefix labels. |
148 | 1.81k | if (FD->hasAttr<AsmLabelAttr>()) |
149 | 0 | Name = getMangledName(D); |
150 | 1.81k | else { |
151 | | // TODO: This mutation should also be applied to other targets other than |
152 | | // PPC, after backend supports IEEE 128-bit style libcalls. |
153 | 1.81k | if (getTriple().isPPC64() && |
154 | 1.81k | &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()119 && |
155 | 1.81k | F128Builtins.contains(BuiltinID)59 ) |
156 | 13 | Name = F128Builtins[BuiltinID]; |
157 | 1.80k | else if (getTriple().isOSAIX() && |
158 | 1.80k | &getTarget().getLongDoubleFormat() == |
159 | 2 | &llvm::APFloat::IEEEdouble() && |
160 | 1.80k | AIXLongDouble64Builtins.contains(BuiltinID)2 ) |
161 | 2 | Name = AIXLongDouble64Builtins[BuiltinID]; |
162 | 1.80k | else |
163 | 1.80k | Name = Context.BuiltinInfo.getName(BuiltinID).substr(10); |
164 | 1.81k | } |
165 | | |
166 | 1.81k | llvm::FunctionType *Ty = |
167 | 1.81k | cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); |
168 | | |
169 | 1.81k | return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); |
170 | 1.81k | } |
171 | | |
172 | | /// Emit the conversions required to turn the given value into an |
173 | | /// integer of the given size. |
174 | | static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, |
175 | 626 | QualType T, llvm::IntegerType *IntType) { |
176 | 626 | V = CGF.EmitToMemory(V, T); |
177 | | |
178 | 626 | if (V->getType()->isPointerTy()) |
179 | 16 | return CGF.Builder.CreatePtrToInt(V, IntType); |
180 | | |
181 | 610 | assert(V->getType() == IntType); |
182 | 610 | return V; |
183 | 610 | } |
184 | | |
185 | | static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, |
186 | 583 | QualType T, llvm::Type *ResultType) { |
187 | 583 | V = CGF.EmitFromMemory(V, T); |
188 | | |
189 | 583 | if (ResultType->isPointerTy()) |
190 | 10 | return CGF.Builder.CreateIntToPtr(V, ResultType); |
191 | | |
192 | 573 | assert(V->getType() == ResultType); |
193 | 573 | return V; |
194 | 573 | } |
195 | | |
196 | | static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF, |
197 | 610 | const CallExpr *E) { |
198 | 610 | ASTContext &Ctx = CGF.getContext(); |
199 | 610 | Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0)); |
200 | 610 | unsigned Bytes = Ptr.getElementType()->isPointerTy() |
201 | 610 | ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()13 |
202 | 610 | : Ptr.getElementType()->getScalarSizeInBits() / 8597 ; |
203 | 610 | unsigned Align = Ptr.getAlignment().getQuantity(); |
204 | 610 | if (Align % Bytes != 0) { |
205 | 5 | DiagnosticsEngine &Diags = CGF.CGM.getDiags(); |
206 | 5 | Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned); |
207 | 5 | } |
208 | 610 | return Ptr.getPointer(); |
209 | 610 | } |
210 | | |
211 | | /// Utility to insert an atomic instruction based on Intrinsic::ID |
212 | | /// and the expression node. |
213 | | static Value *MakeBinaryAtomicValue( |
214 | | CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, |
215 | 504 | AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { |
216 | | |
217 | 504 | QualType T = E->getType(); |
218 | 504 | assert(E->getArg(0)->getType()->isPointerType()); |
219 | 504 | assert(CGF.getContext().hasSameUnqualifiedType(T, |
220 | 504 | E->getArg(0)->getType()->getPointeeType())); |
221 | 504 | assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); |
222 | | |
223 | 504 | llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); |
224 | | |
225 | 504 | llvm::IntegerType *IntType = llvm::IntegerType::get( |
226 | 504 | CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); |
227 | | |
228 | 504 | llvm::Value *Args[2]; |
229 | 504 | Args[0] = DestPtr; |
230 | 504 | Args[1] = CGF.EmitScalarExpr(E->getArg(1)); |
231 | 504 | llvm::Type *ValueType = Args[1]->getType(); |
232 | 504 | Args[1] = EmitToInt(CGF, Args[1], T, IntType); |
233 | | |
234 | 504 | llvm::Value *Result = CGF.Builder.CreateAtomicRMW( |
235 | 504 | Kind, Args[0], Args[1], Ordering); |
236 | 504 | return EmitFromInt(CGF, Result, T, ValueType); |
237 | 504 | } |
238 | | |
239 | 84 | static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { |
240 | 84 | Value *Val = CGF.EmitScalarExpr(E->getArg(0)); |
241 | 84 | Value *Address = CGF.EmitScalarExpr(E->getArg(1)); |
242 | | |
243 | 84 | Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); |
244 | 84 | LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getArg(0)->getType()); |
245 | 84 | LV.setNontemporal(true); |
246 | 84 | CGF.EmitStoreOfScalar(Val, LV, false); |
247 | 84 | return nullptr; |
248 | 84 | } |
249 | | |
250 | 35 | static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { |
251 | 35 | Value *Address = CGF.EmitScalarExpr(E->getArg(0)); |
252 | | |
253 | 35 | LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); |
254 | 35 | LV.setNontemporal(true); |
255 | 35 | return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); |
256 | 35 | } |
257 | | |
258 | | static RValue EmitBinaryAtomic(CodeGenFunction &CGF, |
259 | | llvm::AtomicRMWInst::BinOp Kind, |
260 | 134 | const CallExpr *E) { |
261 | 134 | return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); |
262 | 134 | } |
263 | | |
264 | | /// Utility to insert an atomic instruction based Intrinsic::ID and |
265 | | /// the expression node, where the return value is the result of the |
266 | | /// operation. |
267 | | static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, |
268 | | llvm::AtomicRMWInst::BinOp Kind, |
269 | | const CallExpr *E, |
270 | | Instruction::BinaryOps Op, |
271 | 64 | bool Invert = false) { |
272 | 64 | QualType T = E->getType(); |
273 | 64 | assert(E->getArg(0)->getType()->isPointerType()); |
274 | 64 | assert(CGF.getContext().hasSameUnqualifiedType(T, |
275 | 64 | E->getArg(0)->getType()->getPointeeType())); |
276 | 64 | assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); |
277 | | |
278 | 64 | llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); |
279 | | |
280 | 64 | llvm::IntegerType *IntType = llvm::IntegerType::get( |
281 | 64 | CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); |
282 | | |
283 | 64 | llvm::Value *Args[2]; |
284 | 64 | Args[1] = CGF.EmitScalarExpr(E->getArg(1)); |
285 | 64 | llvm::Type *ValueType = Args[1]->getType(); |
286 | 64 | Args[1] = EmitToInt(CGF, Args[1], T, IntType); |
287 | 64 | Args[0] = DestPtr; |
288 | | |
289 | 64 | llvm::Value *Result = CGF.Builder.CreateAtomicRMW( |
290 | 64 | Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); |
291 | 64 | Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); |
292 | 64 | if (Invert) |
293 | 12 | Result = |
294 | 12 | CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, |
295 | 12 | llvm::ConstantInt::getAllOnesValue(IntType)); |
296 | 64 | Result = EmitFromInt(CGF, Result, T, ValueType); |
297 | 64 | return RValue::get(Result); |
298 | 64 | } |
299 | | |
300 | | /// Utility to insert an atomic cmpxchg instruction. |
301 | | /// |
302 | | /// @param CGF The current codegen function. |
303 | | /// @param E Builtin call expression to convert to cmpxchg. |
304 | | /// arg0 - address to operate on |
305 | | /// arg1 - value to compare with |
306 | | /// arg2 - new value |
307 | | /// @param ReturnBool Specifies whether to return success flag of |
308 | | /// cmpxchg result or the old value. |
309 | | /// |
310 | | /// @returns result of cmpxchg, according to ReturnBool |
311 | | /// |
312 | | /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics |
313 | | /// invoke the function EmitAtomicCmpXchgForMSIntrin. |
314 | | static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, |
315 | 29 | bool ReturnBool) { |
316 | 29 | QualType T = ReturnBool ? E->getArg(1)->getType()14 : E->getType()15 ; |
317 | 29 | llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); |
318 | | |
319 | 29 | llvm::IntegerType *IntType = llvm::IntegerType::get( |
320 | 29 | CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); |
321 | | |
322 | 29 | Value *Args[3]; |
323 | 29 | Args[0] = DestPtr; |
324 | 29 | Args[1] = CGF.EmitScalarExpr(E->getArg(1)); |
325 | 29 | llvm::Type *ValueType = Args[1]->getType(); |
326 | 29 | Args[1] = EmitToInt(CGF, Args[1], T, IntType); |
327 | 29 | Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); |
328 | | |
329 | 29 | Value *Pair = CGF.Builder.CreateAtomicCmpXchg( |
330 | 29 | Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, |
331 | 29 | llvm::AtomicOrdering::SequentiallyConsistent); |
332 | 29 | if (ReturnBool) |
333 | | // Extract boolean success flag and zext it to int. |
334 | 14 | return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), |
335 | 14 | CGF.ConvertType(E->getType())); |
336 | 15 | else |
337 | | // Extract old value and emit it using the same type as compare value. |
338 | 15 | return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, |
339 | 15 | ValueType); |
340 | 29 | } |
341 | | |
342 | | /// This function should be invoked to emit atomic cmpxchg for Microsoft's |
343 | | /// _InterlockedCompareExchange* intrinsics which have the following signature: |
344 | | /// T _InterlockedCompareExchange(T volatile *Destination, |
345 | | /// T Exchange, |
346 | | /// T Comparand); |
347 | | /// |
348 | | /// Whereas the llvm 'cmpxchg' instruction has the following syntax: |
349 | | /// cmpxchg *Destination, Comparand, Exchange. |
350 | | /// So we need to swap Comparand and Exchange when invoking |
351 | | /// CreateAtomicCmpXchg. That is the reason we could not use the above utility |
352 | | /// function MakeAtomicCmpXchgValue since it expects the arguments to be |
353 | | /// already swapped. |
354 | | |
355 | | static |
356 | | Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, |
357 | 68 | AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { |
358 | 68 | assert(E->getArg(0)->getType()->isPointerType()); |
359 | 68 | assert(CGF.getContext().hasSameUnqualifiedType( |
360 | 68 | E->getType(), E->getArg(0)->getType()->getPointeeType())); |
361 | 68 | assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), |
362 | 68 | E->getArg(1)->getType())); |
363 | 68 | assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), |
364 | 68 | E->getArg(2)->getType())); |
365 | | |
366 | 68 | auto *Destination = CGF.EmitScalarExpr(E->getArg(0)); |
367 | 68 | auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); |
368 | 68 | auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); |
369 | | |
370 | | // For Release ordering, the failure ordering should be Monotonic. |
371 | 68 | auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? |
372 | 14 | AtomicOrdering::Monotonic : |
373 | 68 | SuccessOrdering54 ; |
374 | | |
375 | | // The atomic instruction is marked volatile for consistency with MSVC. This |
376 | | // blocks the few atomics optimizations that LLVM has. If we want to optimize |
377 | | // _Interlocked* operations in the future, we will have to remove the volatile |
378 | | // marker. |
379 | 68 | auto *Result = CGF.Builder.CreateAtomicCmpXchg( |
380 | 68 | Destination, Comparand, Exchange, |
381 | 68 | SuccessOrdering, FailureOrdering); |
382 | 68 | Result->setVolatile(true); |
383 | 68 | return CGF.Builder.CreateExtractValue(Result, 0); |
384 | 68 | } |
385 | | |
386 | | // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are |
387 | | // prototyped like this: |
388 | | // |
389 | | // unsigned char _InterlockedCompareExchange128...( |
390 | | // __int64 volatile * _Destination, |
391 | | // __int64 _ExchangeHigh, |
392 | | // __int64 _ExchangeLow, |
393 | | // __int64 * _ComparandResult); |
394 | | static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, |
395 | | const CallExpr *E, |
396 | 5 | AtomicOrdering SuccessOrdering) { |
397 | 5 | assert(E->getNumArgs() == 4); |
398 | 5 | llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0)); |
399 | 5 | llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1)); |
400 | 5 | llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2)); |
401 | 5 | llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3)); |
402 | | |
403 | 5 | assert(Destination->getType()->isPointerTy()); |
404 | 5 | assert(!ExchangeHigh->getType()->isPointerTy()); |
405 | 5 | assert(!ExchangeLow->getType()->isPointerTy()); |
406 | 5 | assert(ComparandPtr->getType()->isPointerTy()); |
407 | | |
408 | | // For Release ordering, the failure ordering should be Monotonic. |
409 | 5 | auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release |
410 | 5 | ? AtomicOrdering::Monotonic1 |
411 | 5 | : SuccessOrdering4 ; |
412 | | |
413 | | // Convert to i128 pointers and values. |
414 | 5 | llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); |
415 | 5 | Address ComparandResult(ComparandPtr, Int128Ty, |
416 | 5 | CGF.getContext().toCharUnitsFromBits(128)); |
417 | | |
418 | | // (((i128)hi) << 64) | ((i128)lo) |
419 | 5 | ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); |
420 | 5 | ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty); |
421 | 5 | ExchangeHigh = |
422 | 5 | CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64)); |
423 | 5 | llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow); |
424 | | |
425 | | // Load the comparand for the instruction. |
426 | 5 | llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult); |
427 | | |
428 | 5 | auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, |
429 | 5 | SuccessOrdering, FailureOrdering); |
430 | | |
431 | | // The atomic instruction is marked volatile for consistency with MSVC. This |
432 | | // blocks the few atomics optimizations that LLVM has. If we want to optimize |
433 | | // _Interlocked* operations in the future, we will have to remove the volatile |
434 | | // marker. |
435 | 5 | CXI->setVolatile(true); |
436 | | |
437 | | // Store the result as an outparameter. |
438 | 5 | CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0), |
439 | 5 | ComparandResult); |
440 | | |
441 | | // Get the success boolean and zero extend it to i8. |
442 | 5 | Value *Success = CGF.Builder.CreateExtractValue(CXI, 1); |
443 | 5 | return CGF.Builder.CreateZExt(Success, CGF.Int8Ty); |
444 | 5 | } |
445 | | |
446 | | static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, |
447 | 58 | AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { |
448 | 58 | assert(E->getArg(0)->getType()->isPointerType()); |
449 | | |
450 | 58 | auto *IntTy = CGF.ConvertType(E->getType()); |
451 | 58 | auto *Result = CGF.Builder.CreateAtomicRMW( |
452 | 58 | AtomicRMWInst::Add, |
453 | 58 | CGF.EmitScalarExpr(E->getArg(0)), |
454 | 58 | ConstantInt::get(IntTy, 1), |
455 | 58 | Ordering); |
456 | 58 | return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); |
457 | 58 | } |
458 | | |
459 | | static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, |
460 | 58 | AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { |
461 | 58 | assert(E->getArg(0)->getType()->isPointerType()); |
462 | | |
463 | 58 | auto *IntTy = CGF.ConvertType(E->getType()); |
464 | 58 | auto *Result = CGF.Builder.CreateAtomicRMW( |
465 | 58 | AtomicRMWInst::Sub, |
466 | 58 | CGF.EmitScalarExpr(E->getArg(0)), |
467 | 58 | ConstantInt::get(IntTy, 1), |
468 | 58 | Ordering); |
469 | 58 | return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); |
470 | 58 | } |
471 | | |
472 | | // Build a plain volatile load. |
473 | 16 | static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { |
474 | 16 | Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); |
475 | 16 | QualType ElTy = E->getArg(0)->getType()->getPointeeType(); |
476 | 16 | CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); |
477 | 16 | llvm::Type *ITy = |
478 | 16 | llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); |
479 | 16 | llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize); |
480 | 16 | Load->setVolatile(true); |
481 | 16 | return Load; |
482 | 16 | } |
483 | | |
484 | | // Build a plain volatile store. |
485 | 16 | static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { |
486 | 16 | Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); |
487 | 16 | Value *Value = CGF.EmitScalarExpr(E->getArg(1)); |
488 | 16 | QualType ElTy = E->getArg(0)->getType()->getPointeeType(); |
489 | 16 | CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); |
490 | 16 | llvm::StoreInst *Store = |
491 | 16 | CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); |
492 | 16 | Store->setVolatile(true); |
493 | 16 | return Store; |
494 | 16 | } |
495 | | |
496 | | // Emit a simple mangled intrinsic that has 1 argument and a return type |
497 | | // matching the argument type. Depending on mode, this may be a constrained |
498 | | // floating-point intrinsic. |
499 | | static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, |
500 | | const CallExpr *E, unsigned IntrinsicID, |
501 | 810 | unsigned ConstrainedIntrinsicID) { |
502 | 810 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
503 | | |
504 | 810 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
505 | 810 | if (CGF.Builder.getIsFPConstrained()) { |
506 | 101 | Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); |
507 | 101 | return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); |
508 | 709 | } else { |
509 | 709 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); |
510 | 709 | return CGF.Builder.CreateCall(F, Src0); |
511 | 709 | } |
512 | 810 | } |
513 | | |
514 | | // Emit an intrinsic that has 2 operands of the same type as its result. |
515 | | // Depending on mode, this may be a constrained floating-point intrinsic. |
516 | | static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, |
517 | | const CallExpr *E, unsigned IntrinsicID, |
518 | 188 | unsigned ConstrainedIntrinsicID) { |
519 | 188 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
520 | 188 | llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); |
521 | | |
522 | 188 | if (CGF.Builder.getIsFPConstrained()) { |
523 | 21 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
524 | 21 | Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); |
525 | 21 | return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); |
526 | 167 | } else { |
527 | 167 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); |
528 | 167 | return CGF.Builder.CreateCall(F, { Src0, Src1 }); |
529 | 167 | } |
530 | 188 | } |
531 | | |
532 | | // Has second type mangled argument. |
533 | | static Value *emitBinaryExpMaybeConstrainedFPBuiltin( |
534 | | CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, |
535 | 18 | llvm::Intrinsic::ID ConstrainedIntrinsicID) { |
536 | 18 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
537 | 18 | llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); |
538 | | |
539 | 18 | if (CGF.Builder.getIsFPConstrained()) { |
540 | 4 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
541 | 4 | Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, |
542 | 4 | {Src0->getType(), Src1->getType()}); |
543 | 4 | return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); |
544 | 4 | } |
545 | | |
546 | 14 | Function *F = |
547 | 14 | CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()}); |
548 | 14 | return CGF.Builder.CreateCall(F, {Src0, Src1}); |
549 | 18 | } |
550 | | |
551 | | // Emit an intrinsic that has 3 operands of the same type as its result. |
552 | | // Depending on mode, this may be a constrained floating-point intrinsic. |
553 | | static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, |
554 | | const CallExpr *E, unsigned IntrinsicID, |
555 | 79 | unsigned ConstrainedIntrinsicID) { |
556 | 79 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
557 | 79 | llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); |
558 | 79 | llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); |
559 | | |
560 | 79 | if (CGF.Builder.getIsFPConstrained()) { |
561 | 7 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
562 | 7 | Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); |
563 | 7 | return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); |
564 | 72 | } else { |
565 | 72 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); |
566 | 72 | return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); |
567 | 72 | } |
568 | 79 | } |
569 | | |
570 | | // Emit an intrinsic where all operands are of the same type as the result. |
571 | | // Depending on mode, this may be a constrained floating-point intrinsic. |
572 | | static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, |
573 | | unsigned IntrinsicID, |
574 | | unsigned ConstrainedIntrinsicID, |
575 | | llvm::Type *Ty, |
576 | 124 | ArrayRef<Value *> Args) { |
577 | 124 | Function *F; |
578 | 124 | if (CGF.Builder.getIsFPConstrained()) |
579 | 38 | F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty); |
580 | 86 | else |
581 | 86 | F = CGF.CGM.getIntrinsic(IntrinsicID, Ty); |
582 | | |
583 | 124 | if (CGF.Builder.getIsFPConstrained()) |
584 | 38 | return CGF.Builder.CreateConstrainedFPCall(F, Args); |
585 | 86 | else |
586 | 86 | return CGF.Builder.CreateCall(F, Args); |
587 | 124 | } |
588 | | |
589 | | // Emit a simple mangled intrinsic that has 1 argument and a return type |
590 | | // matching the argument type. |
591 | | static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, |
592 | | unsigned IntrinsicID, |
593 | 713 | llvm::StringRef Name = "") { |
594 | 713 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
595 | | |
596 | 713 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); |
597 | 713 | return CGF.Builder.CreateCall(F, Src0, Name); |
598 | 713 | } |
599 | | |
600 | | // Emit an intrinsic that has 2 operands of the same type as its result. |
601 | | static Value *emitBinaryBuiltin(CodeGenFunction &CGF, |
602 | | const CallExpr *E, |
603 | 110 | unsigned IntrinsicID) { |
604 | 110 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
605 | 110 | llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); |
606 | | |
607 | 110 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); |
608 | 110 | return CGF.Builder.CreateCall(F, { Src0, Src1 }); |
609 | 110 | } |
610 | | |
611 | | // Emit an intrinsic that has 3 operands of the same type as its result. |
612 | | static Value *emitTernaryBuiltin(CodeGenFunction &CGF, |
613 | | const CallExpr *E, |
614 | 29 | unsigned IntrinsicID) { |
615 | 29 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
616 | 29 | llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); |
617 | 29 | llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); |
618 | | |
619 | 29 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); |
620 | 29 | return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); |
621 | 29 | } |
622 | | |
623 | | // Emit an intrinsic that has 1 float or double operand, and 1 integer. |
624 | | static Value *emitFPIntBuiltin(CodeGenFunction &CGF, |
625 | | const CallExpr *E, |
626 | 9 | unsigned IntrinsicID) { |
627 | 9 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
628 | 9 | llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); |
629 | | |
630 | 9 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); |
631 | 9 | return CGF.Builder.CreateCall(F, {Src0, Src1}); |
632 | 9 | } |
633 | | |
634 | | // Emit an intrinsic that has overloaded integer result and fp operand. |
635 | | static Value * |
636 | | emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, |
637 | | unsigned IntrinsicID, |
638 | 96 | unsigned ConstrainedIntrinsicID) { |
639 | 96 | llvm::Type *ResultType = CGF.ConvertType(E->getType()); |
640 | 96 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
641 | | |
642 | 96 | if (CGF.Builder.getIsFPConstrained()) { |
643 | 28 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
644 | 28 | Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, |
645 | 28 | {ResultType, Src0->getType()}); |
646 | 28 | return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); |
647 | 68 | } else { |
648 | 68 | Function *F = |
649 | 68 | CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()}); |
650 | 68 | return CGF.Builder.CreateCall(F, Src0); |
651 | 68 | } |
652 | 96 | } |
653 | | |
654 | | static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, |
655 | 39 | llvm::Intrinsic::ID IntrinsicID) { |
656 | 39 | llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); |
657 | 39 | llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); |
658 | | |
659 | 39 | QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType(); |
660 | 39 | llvm::Type *IntTy = CGF.ConvertType(IntPtrTy); |
661 | 39 | llvm::Function *F = |
662 | 39 | CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy}); |
663 | 39 | llvm::Value *Call = CGF.Builder.CreateCall(F, Src0); |
664 | | |
665 | 39 | llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1); |
666 | 39 | LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy); |
667 | 39 | CGF.EmitStoreOfScalar(Exp, LV); |
668 | | |
669 | 39 | return CGF.Builder.CreateExtractValue(Call, 0); |
670 | 39 | } |
671 | | |
672 | | /// EmitFAbs - Emit a call to @llvm.fabs(). |
673 | 10 | static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { |
674 | 10 | Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); |
675 | 10 | llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); |
676 | 10 | Call->setDoesNotAccessMemory(); |
677 | 10 | return Call; |
678 | 10 | } |
679 | | |
680 | | /// Emit the computation of the sign bit for a floating point value. Returns |
681 | | /// the i1 sign bit value. |
682 | 39 | static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { |
683 | 39 | LLVMContext &C = CGF.CGM.getLLVMContext(); |
684 | | |
685 | 39 | llvm::Type *Ty = V->getType(); |
686 | 39 | int Width = Ty->getPrimitiveSizeInBits(); |
687 | 39 | llvm::Type *IntTy = llvm::IntegerType::get(C, Width); |
688 | 39 | V = CGF.Builder.CreateBitCast(V, IntTy); |
689 | 39 | if (Ty->isPPC_FP128Ty()) { |
690 | | // We want the sign bit of the higher-order double. The bitcast we just |
691 | | // did works as if the double-double was stored to memory and then |
692 | | // read as an i128. The "store" will put the higher-order double in the |
693 | | // lower address in both little- and big-Endian modes, but the "load" |
694 | | // will treat those bits as a different part of the i128: the low bits in |
695 | | // little-Endian, the high bits in big-Endian. Therefore, on big-Endian |
696 | | // we need to shift the high bits down to the low before truncating. |
697 | 14 | Width >>= 1; |
698 | 14 | if (CGF.getTarget().isBigEndian()) { |
699 | 9 | Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); |
700 | 9 | V = CGF.Builder.CreateLShr(V, ShiftCst); |
701 | 9 | } |
702 | | // We are truncating value in order to extract the higher-order |
703 | | // double, which we will be using to extract the sign from. |
704 | 14 | IntTy = llvm::IntegerType::get(C, Width); |
705 | 14 | V = CGF.Builder.CreateTrunc(V, IntTy); |
706 | 14 | } |
707 | 39 | Value *Zero = llvm::Constant::getNullValue(IntTy); |
708 | 39 | return CGF.Builder.CreateICmpSLT(V, Zero); |
709 | 39 | } |
710 | | |
711 | | static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, |
712 | 13.6k | const CallExpr *E, llvm::Constant *calleeValue) { |
713 | 13.6k | CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); |
714 | 13.6k | return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); |
715 | 13.6k | } |
716 | | |
717 | | /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* |
718 | | /// depending on IntrinsicID. |
719 | | /// |
720 | | /// \arg CGF The current codegen function. |
721 | | /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. |
722 | | /// \arg X The first argument to the llvm.*.with.overflow.*. |
723 | | /// \arg Y The second argument to the llvm.*.with.overflow.*. |
724 | | /// \arg Carry The carry returned by the llvm.*.with.overflow.*. |
725 | | /// \returns The result (i.e. sum/product) returned by the intrinsic. |
726 | | static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, |
727 | | const llvm::Intrinsic::ID IntrinsicID, |
728 | | llvm::Value *X, llvm::Value *Y, |
729 | 220 | llvm::Value *&Carry) { |
730 | | // Make sure we have integers of the same width. |
731 | 220 | assert(X->getType() == Y->getType() && |
732 | 220 | "Arguments must be the same type. (Did you forget to make sure both " |
733 | 220 | "arguments have the same integer width?)"); |
734 | | |
735 | 220 | Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); |
736 | 220 | llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); |
737 | 220 | Carry = CGF.Builder.CreateExtractValue(Tmp, 1); |
738 | 220 | return CGF.Builder.CreateExtractValue(Tmp, 0); |
739 | 220 | } |
740 | | |
741 | | static Value *emitRangedBuiltin(CodeGenFunction &CGF, |
742 | | unsigned IntrinsicID, |
743 | 6 | int low, int high) { |
744 | 6 | llvm::MDBuilder MDHelper(CGF.getLLVMContext()); |
745 | 6 | llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); |
746 | 6 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); |
747 | 6 | llvm::Instruction *Call = CGF.Builder.CreateCall(F); |
748 | 6 | Call->setMetadata(llvm::LLVMContext::MD_range, RNode); |
749 | 6 | Call->setMetadata(llvm::LLVMContext::MD_noundef, |
750 | 6 | llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); |
751 | 6 | return Call; |
752 | 6 | } |
753 | | |
754 | | namespace { |
755 | | struct WidthAndSignedness { |
756 | | unsigned Width; |
757 | | bool Signed; |
758 | | }; |
759 | | } |
760 | | |
761 | | static WidthAndSignedness |
762 | | getIntegerWidthAndSignedness(const clang::ASTContext &context, |
763 | 315 | const clang::QualType Type) { |
764 | 315 | assert(Type->isIntegerType() && "Given type is not an integer."); |
765 | 315 | unsigned Width = Type->isBooleanType() ? 118 |
766 | 315 | : Type->isBitIntType()297 ? context.getIntWidth(Type)45 |
767 | 297 | : context.getTypeInfo(Type).Width252 ; |
768 | 315 | bool Signed = Type->isSignedIntegerType(); |
769 | 315 | return {Width, Signed}; |
770 | 315 | } |
771 | | |
772 | | // Given one or more integer types, this function produces an integer type that |
773 | | // encompasses them: any value in one of the given types could be expressed in |
774 | | // the encompassing type. |
775 | | static struct WidthAndSignedness |
776 | 69 | EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { |
777 | 69 | assert(Types.size() > 0 && "Empty list of types."); |
778 | | |
779 | | // If any of the given types is signed, we must return a signed type. |
780 | 69 | bool Signed = false; |
781 | 207 | for (const auto &Type : Types) { |
782 | 207 | Signed |= Type.Signed; |
783 | 207 | } |
784 | | |
785 | | // The encompassing type must have a width greater than or equal to the width |
786 | | // of the specified types. Additionally, if the encompassing type is signed, |
787 | | // its width must be strictly greater than the width of any unsigned types |
788 | | // given. |
789 | 69 | unsigned Width = 0; |
790 | 207 | for (const auto &Type : Types) { |
791 | 207 | unsigned MinWidth = Type.Width + (Signed && !Type.Signed153 ); |
792 | 207 | if (Width < MinWidth) { |
793 | 79 | Width = MinWidth; |
794 | 79 | } |
795 | 207 | } |
796 | | |
797 | 69 | return {Width, Signed}; |
798 | 69 | } |
799 | | |
800 | 519 | Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { |
801 | 519 | Intrinsic::ID inst = IsStart ? Intrinsic::vastart279 : Intrinsic::vaend240 ; |
802 | 519 | return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); |
803 | 519 | } |
804 | | |
805 | | /// Checks if using the result of __builtin_object_size(p, @p From) in place of |
806 | | /// __builtin_object_size(p, @p To) is correct |
807 | 38 | static bool areBOSTypesCompatible(int From, int To) { |
808 | | // Note: Our __builtin_object_size implementation currently treats Type=0 and |
809 | | // Type=2 identically. Encoding this implementation detail here may make |
810 | | // improving __builtin_object_size difficult in the future, so it's omitted. |
811 | 38 | return From == To || (12 From == 012 && To == 13 ) || (11 From == 311 && To == 23 ); |
812 | 38 | } |
813 | | |
814 | | static llvm::Value * |
815 | 57 | getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { |
816 | 57 | return ConstantInt::get(ResType, (Type & 2) ? 035 : -122 , /*isSigned=*/true); |
817 | 57 | } |
818 | | |
819 | | llvm::Value * |
820 | | CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, |
821 | | llvm::IntegerType *ResType, |
822 | | llvm::Value *EmittedE, |
823 | 107 | bool IsDynamic) { |
824 | 107 | uint64_t ObjectSize; |
825 | 107 | if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) |
826 | 55 | return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); |
827 | 52 | return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); |
828 | 107 | } |
829 | | |
830 | | /// Returns a Value corresponding to the size of the given expression. |
831 | | /// This Value may be either of the following: |
832 | | /// - A llvm::Argument (if E is a param with the pass_object_size attribute on |
833 | | /// it) |
834 | | /// - A call to the @llvm.objectsize intrinsic |
835 | | /// |
836 | | /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null |
837 | | /// and we wouldn't otherwise try to reference a pass_object_size parameter, |
838 | | /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. |
839 | | llvm::Value * |
840 | | CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, |
841 | | llvm::IntegerType *ResType, |
842 | 349 | llvm::Value *EmittedE, bool IsDynamic) { |
843 | | // We need to reference an argument if the pointer is a parameter with the |
844 | | // pass_object_size attribute. |
845 | 349 | if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { |
846 | 149 | auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); |
847 | 149 | auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); |
848 | 149 | if (Param != nullptr && PS != nullptr86 && |
849 | 149 | areBOSTypesCompatible(PS->getType(), Type)38 ) { |
850 | 28 | auto Iter = SizeArguments.find(Param); |
851 | 28 | assert(Iter != SizeArguments.end()); |
852 | | |
853 | 28 | const ImplicitParamDecl *D = Iter->second; |
854 | 28 | auto DIter = LocalDeclMap.find(D); |
855 | 28 | assert(DIter != LocalDeclMap.end()); |
856 | | |
857 | 28 | return EmitLoadOfScalar(DIter->second, /*Volatile=*/false, |
858 | 28 | getContext().getSizeType(), E->getBeginLoc()); |
859 | 28 | } |
860 | 149 | } |
861 | | |
862 | | // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't |
863 | | // evaluate E for side-effects. In either case, we shouldn't lower to |
864 | | // @llvm.objectsize. |
865 | 321 | if (Type == 3 || (286 !EmittedE286 && E->HasSideEffects(getContext())246 )) |
866 | 35 | return getDefaultBuiltinObjectSizeResult(Type, ResType); |
867 | | |
868 | 286 | if (IsDynamic) { |
869 | | // The code generated here calculates the size of a struct with a flexible |
870 | | // array member that uses the counted_by attribute. There are two instances |
871 | | // we handle: |
872 | | // |
873 | | // struct s { |
874 | | // unsigned long flags; |
875 | | // int count; |
876 | | // int array[] __attribute__((counted_by(count))); |
877 | | // } |
878 | | // |
879 | | // 1) bdos of the flexible array itself: |
880 | | // |
881 | | // __builtin_dynamic_object_size(p->array, 1) == |
882 | | // p->count * sizeof(*p->array) |
883 | | // |
884 | | // 2) bdos of a pointer into the flexible array: |
885 | | // |
886 | | // __builtin_dynamic_object_size(&p->array[42], 1) == |
887 | | // (p->count - 42) * sizeof(*p->array) |
888 | | // |
889 | | // 2) bdos of the whole struct, including the flexible array: |
890 | | // |
891 | | // __builtin_dynamic_object_size(p, 1) == |
892 | | // max(sizeof(struct s), |
893 | | // offsetof(struct s, array) + p->count * sizeof(*p->array)) |
894 | | // |
895 | 140 | const Expr *Base = E->IgnoreParenImpCasts(); |
896 | 140 | const Expr *Idx = nullptr; |
897 | 140 | if (const auto *UO = dyn_cast<UnaryOperator>(Base); |
898 | 140 | UO && UO->getOpcode() == UO_AddrOf24 ) { |
899 | 24 | if (const auto *ASE = |
900 | 24 | dyn_cast<ArraySubscriptExpr>(UO->getSubExpr()->IgnoreParens())) { |
901 | 19 | Base = ASE->getBase(); |
902 | 19 | Idx = ASE->getIdx()->IgnoreParenImpCasts(); |
903 | | |
904 | 19 | if (const auto *IL = dyn_cast<IntegerLiteral>(Idx); |
905 | 19 | IL && !IL->getValue().getSExtValue()15 ) |
906 | 4 | Idx = nullptr; |
907 | 19 | } |
908 | 24 | } |
909 | | |
910 | 140 | if (const ValueDecl *CountedByFD = FindCountedByField(Base)) { |
911 | 22 | bool IsSigned = CountedByFD->getType()->isSignedIntegerType(); |
912 | 22 | const RecordDecl *OuterRD = |
913 | 22 | CountedByFD->getDeclContext()->getOuterLexicalRecordContext(); |
914 | 22 | ASTContext &Ctx = getContext(); |
915 | | |
916 | | // Load the counted_by field. |
917 | 22 | const Expr *CountedByExpr = BuildCountedByFieldExpr(Base, CountedByFD); |
918 | 22 | Value *CountedByInst = EmitAnyExprToTemp(CountedByExpr).getScalarVal(); |
919 | 22 | llvm::Type *CountedByTy = CountedByInst->getType(); |
920 | | |
921 | 22 | if (Idx) { |
922 | | // There's an index into the array. Remove it from the count. |
923 | 6 | bool IdxSigned = Idx->getType()->isSignedIntegerType(); |
924 | 6 | Value *IdxInst = EmitAnyExprToTemp(Idx).getScalarVal(); |
925 | 6 | IdxInst = IdxSigned ? Builder.CreateSExtOrTrunc(IdxInst, CountedByTy) |
926 | 6 | : Builder.CreateZExtOrTrunc(IdxInst, CountedByTy)0 ; |
927 | | |
928 | | // If the index is negative, don't subtract it from the counted_by |
929 | | // value. The pointer is pointing to something before the FAM. |
930 | 6 | IdxInst = Builder.CreateNeg(IdxInst, "", !IdxSigned, IdxSigned); |
931 | 6 | CountedByInst = |
932 | 6 | Builder.CreateAdd(CountedByInst, IdxInst, "", !IsSigned, IsSigned); |
933 | 6 | } |
934 | | |
935 | | // Get the size of the flexible array member's base type. |
936 | 22 | const ValueDecl *FAMDecl = nullptr; |
937 | 22 | if (const auto *ME = dyn_cast<MemberExpr>(Base)) { |
938 | 8 | const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = |
939 | 8 | getLangOpts().getStrictFlexArraysLevel(); |
940 | 8 | if (const ValueDecl *MD = ME->getMemberDecl(); |
941 | 8 | MD && Decl::isFlexibleArrayMemberLike( |
942 | 8 | Ctx, MD, MD->getType(), StrictFlexArraysLevel, |
943 | 8 | /*IgnoreTemplateOrMacroSubstitution=*/true)) |
944 | | // Base is referencing the FAM itself. |
945 | 8 | FAMDecl = MD; |
946 | 8 | } |
947 | | |
948 | 22 | if (!FAMDecl) |
949 | 14 | FAMDecl = FindFlexibleArrayMemberField(Ctx, OuterRD); |
950 | | |
951 | 22 | assert(FAMDecl && "Can't find the flexible array member field"); |
952 | | |
953 | 22 | const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType()); |
954 | 22 | CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType()); |
955 | 22 | llvm::Constant *ElemSize = |
956 | 22 | llvm::ConstantInt::get(CountedByTy, Size.getQuantity(), IsSigned); |
957 | | |
958 | | // Calculate how large the flexible array member is in bytes. |
959 | 22 | Value *FAMSize = |
960 | 22 | Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned); |
961 | 22 | FAMSize = IsSigned ? Builder.CreateSExtOrTrunc(FAMSize, ResType)16 |
962 | 22 | : Builder.CreateZExtOrTrunc(FAMSize, ResType)6 ; |
963 | 22 | Value *Res = FAMSize; |
964 | | |
965 | 22 | if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { |
966 | | // The whole struct is specificed in the __bdos. |
967 | 8 | const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD); |
968 | | |
969 | | // Get the offset of the FAM. |
970 | 8 | CharUnits Offset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(FAMDecl)); |
971 | 8 | llvm::Constant *FAMOffset = |
972 | 8 | ConstantInt::get(ResType, Offset.getQuantity(), IsSigned); |
973 | | |
974 | | // max(sizeof(struct s), |
975 | | // offsetof(struct s, array) + p->count * sizeof(*p->array)) |
976 | 8 | Value *OffsetAndFAMSize = |
977 | 8 | Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned); |
978 | | |
979 | | // Get the full size of the struct. |
980 | 8 | llvm::Constant *SizeofStruct = |
981 | 8 | ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned); |
982 | | |
983 | 8 | Res = IsSigned |
984 | 8 | ? Builder.CreateBinaryIntrinsic( |
985 | 6 | llvm::Intrinsic::smax, OffsetAndFAMSize, SizeofStruct) |
986 | 8 | : Builder.CreateBinaryIntrinsic( |
987 | 2 | llvm::Intrinsic::umax, OffsetAndFAMSize, SizeofStruct); |
988 | 14 | } else if (const auto *ME = dyn_cast<MemberExpr>(Base)) { |
989 | | // Pointing to a place before the FAM. Add the difference to the FAM's |
990 | | // size. |
991 | 8 | if (const ValueDecl *MD = ME->getMemberDecl(); MD != FAMDecl) { |
992 | 0 | CharUnits Offset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(MD)); |
993 | 0 | CharUnits FAMOffset = |
994 | 0 | Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(FAMDecl)); |
995 | |
|
996 | 0 | Res = Builder.CreateAdd( |
997 | 0 | Res, ConstantInt::get(ResType, FAMOffset.getQuantity() - |
998 | 0 | Offset.getQuantity())); |
999 | 0 | } |
1000 | 8 | } |
1001 | | |
1002 | | // A negative 'FAMSize' means that the index was greater than the count, |
1003 | | // or an improperly set count field. Return -1 (for types 0 and 1) or 0 |
1004 | | // (for types 2 and 3). |
1005 | 22 | return Builder.CreateSelect( |
1006 | 22 | Builder.CreateIsNeg(FAMSize), |
1007 | 22 | getDefaultBuiltinObjectSizeResult(Type, ResType), Res); |
1008 | 22 | } |
1009 | 140 | } |
1010 | | |
1011 | 264 | Value *Ptr = EmittedE ? EmittedE40 : EmitScalarExpr(E)224 ; |
1012 | 264 | assert(Ptr->getType()->isPointerTy() && |
1013 | 264 | "Non-pointer passed to __builtin_object_size?"); |
1014 | | |
1015 | 264 | Function *F = |
1016 | 264 | CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); |
1017 | | |
1018 | | // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. |
1019 | 264 | Value *Min = Builder.getInt1((Type & 2) != 0); |
1020 | | // For GCC compatibility, __builtin_object_size treat NULL as unknown size. |
1021 | 264 | Value *NullIsUnknown = Builder.getTrue(); |
1022 | 264 | Value *Dynamic = Builder.getInt1(IsDynamic); |
1023 | 264 | return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); |
1024 | 264 | } |
1025 | | |
1026 | | namespace { |
1027 | | /// A struct to generically describe a bit test intrinsic. |
1028 | | struct BitTest { |
1029 | | enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; |
1030 | | enum InterlockingKind : uint8_t { |
1031 | | Unlocked, |
1032 | | Sequential, |
1033 | | Acquire, |
1034 | | Release, |
1035 | | NoFence |
1036 | | }; |
1037 | | |
1038 | | ActionKind Action; |
1039 | | InterlockingKind Interlocking; |
1040 | | bool Is64Bit; |
1041 | | |
1042 | | static BitTest decodeBitTestBuiltin(unsigned BuiltinID); |
1043 | | }; |
1044 | | } // namespace |
1045 | | |
1046 | 51 | BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { |
1047 | 51 | switch (BuiltinID) { |
1048 | | // Main portable variants. |
1049 | 3 | case Builtin::BI_bittest: |
1050 | 3 | return {TestOnly, Unlocked, false}; |
1051 | 3 | case Builtin::BI_bittestandcomplement: |
1052 | 3 | return {Complement, Unlocked, false}; |
1053 | 3 | case Builtin::BI_bittestandreset: |
1054 | 3 | return {Reset, Unlocked, false}; |
1055 | 3 | case Builtin::BI_bittestandset: |
1056 | 3 | return {Set, Unlocked, false}; |
1057 | 3 | case Builtin::BI_interlockedbittestandreset: |
1058 | 3 | return {Reset, Sequential, false}; |
1059 | 6 | case Builtin::BI_interlockedbittestandset: |
1060 | 6 | return {Set, Sequential, false}; |
1061 | | |
1062 | | // X86-specific 64-bit variants. |
1063 | 3 | case Builtin::BI_bittest64: |
1064 | 3 | return {TestOnly, Unlocked, true}; |
1065 | 3 | case Builtin::BI_bittestandcomplement64: |
1066 | 3 | return {Complement, Unlocked, true}; |
1067 | 3 | case Builtin::BI_bittestandreset64: |
1068 | 3 | return {Reset, Unlocked, true}; |
1069 | 3 | case Builtin::BI_bittestandset64: |
1070 | 3 | return {Set, Unlocked, true}; |
1071 | 3 | case Builtin::BI_interlockedbittestandreset64: |
1072 | 3 | return {Reset, Sequential, true}; |
1073 | 3 | case Builtin::BI_interlockedbittestandset64: |
1074 | 3 | return {Set, Sequential, true}; |
1075 | | |
1076 | | // ARM/AArch64-specific ordering variants. |
1077 | 2 | case Builtin::BI_interlockedbittestandset_acq: |
1078 | 2 | return {Set, Acquire, false}; |
1079 | 2 | case Builtin::BI_interlockedbittestandset_rel: |
1080 | 2 | return {Set, Release, false}; |
1081 | 2 | case Builtin::BI_interlockedbittestandset_nf: |
1082 | 2 | return {Set, NoFence, false}; |
1083 | 2 | case Builtin::BI_interlockedbittestandreset_acq: |
1084 | 2 | return {Reset, Acquire, false}; |
1085 | 2 | case Builtin::BI_interlockedbittestandreset_rel: |
1086 | 2 | return {Reset, Release, false}; |
1087 | 2 | case Builtin::BI_interlockedbittestandreset_nf: |
1088 | 2 | return {Reset, NoFence, false}; |
1089 | 51 | } |
1090 | 0 | llvm_unreachable("expected only bittest intrinsics"); |
1091 | 0 | } |
1092 | | |
1093 | 13 | static char bitActionToX86BTCode(BitTest::ActionKind A) { |
1094 | 13 | switch (A) { |
1095 | 2 | case BitTest::TestOnly: return '\0'; |
1096 | 2 | case BitTest::Complement: return 'c'; |
1097 | 4 | case BitTest::Reset: return 'r'; |
1098 | 5 | case BitTest::Set: return 's'; |
1099 | 13 | } |
1100 | 0 | llvm_unreachable("invalid action"); |
1101 | 0 | } |
1102 | | |
1103 | | static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, |
1104 | | BitTest BT, |
1105 | | const CallExpr *E, Value *BitBase, |
1106 | 13 | Value *BitPos) { |
1107 | 13 | char Action = bitActionToX86BTCode(BT.Action); |
1108 | 13 | char SizeSuffix = BT.Is64Bit ? 'q'6 : 'l'7 ; |
1109 | | |
1110 | | // Build the assembly. |
1111 | 13 | SmallString<64> Asm; |
1112 | 13 | raw_svector_ostream AsmOS(Asm); |
1113 | 13 | if (BT.Interlocking != BitTest::Unlocked) |
1114 | 5 | AsmOS << "lock "; |
1115 | 13 | AsmOS << "bt"; |
1116 | 13 | if (Action) |
1117 | 11 | AsmOS << Action; |
1118 | 13 | AsmOS << SizeSuffix << " $2, ($1)"; |
1119 | | |
1120 | | // Build the constraints. FIXME: We should support immediates when possible. |
1121 | 13 | std::string Constraints = "={@ccc},r,r,~{cc},~{memory}"; |
1122 | 13 | std::string_view MachineClobbers = CGF.getTarget().getClobbers(); |
1123 | 13 | if (!MachineClobbers.empty()) { |
1124 | 13 | Constraints += ','; |
1125 | 13 | Constraints += MachineClobbers; |
1126 | 13 | } |
1127 | 13 | llvm::IntegerType *IntType = llvm::IntegerType::get( |
1128 | 13 | CGF.getLLVMContext(), |
1129 | 13 | CGF.getContext().getTypeSize(E->getArg(1)->getType())); |
1130 | 13 | llvm::FunctionType *FTy = |
1131 | 13 | llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false); |
1132 | | |
1133 | 13 | llvm::InlineAsm *IA = |
1134 | 13 | llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); |
1135 | 13 | return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); |
1136 | 13 | } |
1137 | | |
1138 | | static llvm::AtomicOrdering |
1139 | 38 | getBitTestAtomicOrdering(BitTest::InterlockingKind I) { |
1140 | 38 | switch (I) { |
1141 | 16 | case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic; |
1142 | 10 | case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent; |
1143 | 4 | case BitTest::Acquire: return llvm::AtomicOrdering::Acquire; |
1144 | 4 | case BitTest::Release: return llvm::AtomicOrdering::Release; |
1145 | 4 | case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic; |
1146 | 38 | } |
1147 | 0 | llvm_unreachable("invalid interlocking"); |
1148 | 0 | } |
1149 | | |
1150 | | /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of |
1151 | | /// bits and a bit position and read and optionally modify the bit at that |
1152 | | /// position. The position index can be arbitrarily large, i.e. it can be larger |
1153 | | /// than 31 or 63, so we need an indexed load in the general case. |
1154 | | static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, |
1155 | | unsigned BuiltinID, |
1156 | 51 | const CallExpr *E) { |
1157 | 51 | Value *BitBase = CGF.EmitScalarExpr(E->getArg(0)); |
1158 | 51 | Value *BitPos = CGF.EmitScalarExpr(E->getArg(1)); |
1159 | | |
1160 | 51 | BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID); |
1161 | | |
1162 | | // X86 has special BT, BTC, BTR, and BTS instructions that handle the array |
1163 | | // indexing operation internally. Use them if possible. |
1164 | 51 | if (CGF.getTarget().getTriple().isX86()) |
1165 | 13 | return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos); |
1166 | | |
1167 | | // Otherwise, use generic code to load one byte and test the bit. Use all but |
1168 | | // the bottom three bits as the array index, and the bottom three bits to form |
1169 | | // a mask. |
1170 | | // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0; |
1171 | 38 | Value *ByteIndex = CGF.Builder.CreateAShr( |
1172 | 38 | BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx"); |
1173 | 38 | Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy); |
1174 | 38 | Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8, |
1175 | 38 | ByteIndex, "bittest.byteaddr"), |
1176 | 38 | CGF.Int8Ty, CharUnits::One()); |
1177 | 38 | Value *PosLow = |
1178 | 38 | CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty), |
1179 | 38 | llvm::ConstantInt::get(CGF.Int8Ty, 0x7)); |
1180 | | |
1181 | | // The updating instructions will need a mask. |
1182 | 38 | Value *Mask = nullptr; |
1183 | 38 | if (BT.Action != BitTest::TestOnly) { |
1184 | 34 | Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow, |
1185 | 34 | "bittest.mask"); |
1186 | 34 | } |
1187 | | |
1188 | | // Check the action and ordering of the interlocked intrinsics. |
1189 | 38 | llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking); |
1190 | | |
1191 | 38 | Value *OldByte = nullptr; |
1192 | 38 | if (Ordering != llvm::AtomicOrdering::NotAtomic) { |
1193 | | // Emit a combined atomicrmw load/store operation for the interlocked |
1194 | | // intrinsics. |
1195 | 22 | llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or; |
1196 | 22 | if (BT.Action == BitTest::Reset) { |
1197 | 10 | Mask = CGF.Builder.CreateNot(Mask); |
1198 | 10 | RMWOp = llvm::AtomicRMWInst::And; |
1199 | 10 | } |
1200 | 22 | OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask, |
1201 | 22 | Ordering); |
1202 | 22 | } else { |
1203 | | // Emit a plain load for the non-interlocked intrinsics. |
1204 | 16 | OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); |
1205 | 16 | Value *NewByte = nullptr; |
1206 | 16 | switch (BT.Action) { |
1207 | 4 | case BitTest::TestOnly: |
1208 | | // Don't store anything. |
1209 | 4 | break; |
1210 | 4 | case BitTest::Complement: |
1211 | 4 | NewByte = CGF.Builder.CreateXor(OldByte, Mask); |
1212 | 4 | break; |
1213 | 4 | case BitTest::Reset: |
1214 | 4 | NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask)); |
1215 | 4 | break; |
1216 | 4 | case BitTest::Set: |
1217 | 4 | NewByte = CGF.Builder.CreateOr(OldByte, Mask); |
1218 | 4 | break; |
1219 | 16 | } |
1220 | 16 | if (NewByte) |
1221 | 12 | CGF.Builder.CreateStore(NewByte, ByteAddr); |
1222 | 16 | } |
1223 | | |
1224 | | // However we loaded the old byte, either by plain load or atomicrmw, shift |
1225 | | // the bit into the low position and mask it to 0 or 1. |
1226 | 38 | Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr"); |
1227 | 38 | return CGF.Builder.CreateAnd( |
1228 | 38 | ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"); |
1229 | 38 | } |
1230 | | |
1231 | | static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, |
1232 | | unsigned BuiltinID, |
1233 | 0 | const CallExpr *E) { |
1234 | 0 | Value *Addr = CGF.EmitScalarExpr(E->getArg(0)); |
1235 | |
|
1236 | 0 | SmallString<64> Asm; |
1237 | 0 | raw_svector_ostream AsmOS(Asm); |
1238 | 0 | llvm::IntegerType *RetType = CGF.Int32Ty; |
1239 | |
|
1240 | 0 | switch (BuiltinID) { |
1241 | 0 | case clang::PPC::BI__builtin_ppc_ldarx: |
1242 | 0 | AsmOS << "ldarx "; |
1243 | 0 | RetType = CGF.Int64Ty; |
1244 | 0 | break; |
1245 | 0 | case clang::PPC::BI__builtin_ppc_lwarx: |
1246 | 0 | AsmOS << "lwarx "; |
1247 | 0 | RetType = CGF.Int32Ty; |
1248 | 0 | break; |
1249 | 0 | case clang::PPC::BI__builtin_ppc_lharx: |
1250 | 0 | AsmOS << "lharx "; |
1251 | 0 | RetType = CGF.Int16Ty; |
1252 | 0 | break; |
1253 | 0 | case clang::PPC::BI__builtin_ppc_lbarx: |
1254 | 0 | AsmOS << "lbarx "; |
1255 | 0 | RetType = CGF.Int8Ty; |
1256 | 0 | break; |
1257 | 0 | default: |
1258 | 0 | llvm_unreachable("Expected only PowerPC load reserve intrinsics"); |
1259 | 0 | } |
1260 | | |
1261 | 0 | AsmOS << "$0, ${1:y}"; |
1262 | |
|
1263 | 0 | std::string Constraints = "=r,*Z,~{memory}"; |
1264 | 0 | std::string_view MachineClobbers = CGF.getTarget().getClobbers(); |
1265 | 0 | if (!MachineClobbers.empty()) { |
1266 | 0 | Constraints += ','; |
1267 | 0 | Constraints += MachineClobbers; |
1268 | 0 | } |
1269 | |
|
1270 | 0 | llvm::Type *PtrType = CGF.UnqualPtrTy; |
1271 | 0 | llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); |
1272 | |
|
1273 | 0 | llvm::InlineAsm *IA = |
1274 | 0 | llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); |
1275 | 0 | llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr}); |
1276 | 0 | CI->addParamAttr( |
1277 | 0 | 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType)); |
1278 | 0 | return CI; |
1279 | 0 | } |
1280 | | |
1281 | | namespace { |
1282 | | enum class MSVCSetJmpKind { |
1283 | | _setjmpex, |
1284 | | _setjmp3, |
1285 | | _setjmp |
1286 | | }; |
1287 | | } |
1288 | | |
1289 | | /// MSVC handles setjmp a bit differently on different platforms. On every |
1290 | | /// architecture except 32-bit x86, the frame address is passed. On x86, extra |
1291 | | /// parameters can be passed as variadic arguments, but we always pass none. |
1292 | | static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, |
1293 | 12 | const CallExpr *E) { |
1294 | 12 | llvm::Value *Arg1 = nullptr; |
1295 | 12 | llvm::Type *Arg1Ty = nullptr; |
1296 | 12 | StringRef Name; |
1297 | 12 | bool IsVarArg = false; |
1298 | 12 | if (SJKind == MSVCSetJmpKind::_setjmp3) { |
1299 | 2 | Name = "_setjmp3"; |
1300 | 2 | Arg1Ty = CGF.Int32Ty; |
1301 | 2 | Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0); |
1302 | 2 | IsVarArg = true; |
1303 | 10 | } else { |
1304 | 10 | Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp"2 : "_setjmpex"8 ; |
1305 | 10 | Arg1Ty = CGF.Int8PtrTy; |
1306 | 10 | if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { |
1307 | 4 | Arg1 = CGF.Builder.CreateCall( |
1308 | 4 | CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy)); |
1309 | 4 | } else |
1310 | 6 | Arg1 = CGF.Builder.CreateCall( |
1311 | 6 | CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), |
1312 | 6 | llvm::ConstantInt::get(CGF.Int32Ty, 0)); |
1313 | 10 | } |
1314 | | |
1315 | | // Mark the call site and declaration with ReturnsTwice. |
1316 | 12 | llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty}; |
1317 | 12 | llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( |
1318 | 12 | CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, |
1319 | 12 | llvm::Attribute::ReturnsTwice); |
1320 | 12 | llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( |
1321 | 12 | llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, |
1322 | 12 | ReturnsTwiceAttr, /*Local=*/true); |
1323 | | |
1324 | 12 | llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( |
1325 | 12 | CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); |
1326 | 12 | llvm::Value *Args[] = {Buf, Arg1}; |
1327 | 12 | llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); |
1328 | 12 | CB->setAttributes(ReturnsTwiceAttr); |
1329 | 12 | return RValue::get(CB); |
1330 | 12 | } |
1331 | | |
1332 | | // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, |
1333 | | // we handle them here. |
1334 | | enum class CodeGenFunction::MSVCIntrin { |
1335 | | _BitScanForward, |
1336 | | _BitScanReverse, |
1337 | | _InterlockedAnd, |
1338 | | _InterlockedDecrement, |
1339 | | _InterlockedExchange, |
1340 | | _InterlockedExchangeAdd, |
1341 | | _InterlockedExchangeSub, |
1342 | | _InterlockedIncrement, |
1343 | | _InterlockedOr, |
1344 | | _InterlockedXor, |
1345 | | _InterlockedExchangeAdd_acq, |
1346 | | _InterlockedExchangeAdd_rel, |
1347 | | _InterlockedExchangeAdd_nf, |
1348 | | _InterlockedExchange_acq, |
1349 | | _InterlockedExchange_rel, |
1350 | | _InterlockedExchange_nf, |
1351 | | _InterlockedCompareExchange_acq, |
1352 | | _InterlockedCompareExchange_rel, |
1353 | | _InterlockedCompareExchange_nf, |
1354 | | _InterlockedCompareExchange128, |
1355 | | _InterlockedCompareExchange128_acq, |
1356 | | _InterlockedCompareExchange128_rel, |
1357 | | _InterlockedCompareExchange128_nf, |
1358 | | _InterlockedOr_acq, |
1359 | | _InterlockedOr_rel, |
1360 | | _InterlockedOr_nf, |
1361 | | _InterlockedXor_acq, |
1362 | | _InterlockedXor_rel, |
1363 | | _InterlockedXor_nf, |
1364 | | _InterlockedAnd_acq, |
1365 | | _InterlockedAnd_rel, |
1366 | | _InterlockedAnd_nf, |
1367 | | _InterlockedIncrement_acq, |
1368 | | _InterlockedIncrement_rel, |
1369 | | _InterlockedIncrement_nf, |
1370 | | _InterlockedDecrement_acq, |
1371 | | _InterlockedDecrement_rel, |
1372 | | _InterlockedDecrement_nf, |
1373 | | __fastfail, |
1374 | | }; |
1375 | | |
1376 | | static std::optional<CodeGenFunction::MSVCIntrin> |
1377 | 6.56k | translateArmToMsvcIntrin(unsigned BuiltinID) { |
1378 | 6.56k | using MSVCIntrin = CodeGenFunction::MSVCIntrin; |
1379 | 6.56k | switch (BuiltinID) { |
1380 | 6.40k | default: |
1381 | 6.40k | return std::nullopt; |
1382 | 3 | case clang::ARM::BI_BitScanForward: |
1383 | 4 | case clang::ARM::BI_BitScanForward64: |
1384 | 4 | return MSVCIntrin::_BitScanForward; |
1385 | 3 | case clang::ARM::BI_BitScanReverse: |
1386 | 4 | case clang::ARM::BI_BitScanReverse64: |
1387 | 4 | return MSVCIntrin::_BitScanReverse; |
1388 | 1 | case clang::ARM::BI_InterlockedAnd64: |
1389 | 1 | return MSVCIntrin::_InterlockedAnd; |
1390 | 1 | case clang::ARM::BI_InterlockedExchange64: |
1391 | 1 | return MSVCIntrin::_InterlockedExchange; |
1392 | 1 | case clang::ARM::BI_InterlockedExchangeAdd64: |
1393 | 1 | return MSVCIntrin::_InterlockedExchangeAdd; |
1394 | 1 | case clang::ARM::BI_InterlockedExchangeSub64: |
1395 | 1 | return MSVCIntrin::_InterlockedExchangeSub; |
1396 | 1 | case clang::ARM::BI_InterlockedOr64: |
1397 | 1 | return MSVCIntrin::_InterlockedOr; |
1398 | 1 | case clang::ARM::BI_InterlockedXor64: |
1399 | 1 | return MSVCIntrin::_InterlockedXor; |
1400 | 1 | case clang::ARM::BI_InterlockedDecrement64: |
1401 | 1 | return MSVCIntrin::_InterlockedDecrement; |
1402 | 1 | case clang::ARM::BI_InterlockedIncrement64: |
1403 | 1 | return MSVCIntrin::_InterlockedIncrement; |
1404 | 1 | case clang::ARM::BI_InterlockedExchangeAdd8_acq: |
1405 | 2 | case clang::ARM::BI_InterlockedExchangeAdd16_acq: |
1406 | 5 | case clang::ARM::BI_InterlockedExchangeAdd_acq: |
1407 | 6 | case clang::ARM::BI_InterlockedExchangeAdd64_acq: |
1408 | 6 | return MSVCIntrin::_InterlockedExchangeAdd_acq; |
1409 | 1 | case clang::ARM::BI_InterlockedExchangeAdd8_rel: |
1410 | 2 | case clang::ARM::BI_InterlockedExchangeAdd16_rel: |
1411 | 5 | case clang::ARM::BI_InterlockedExchangeAdd_rel: |
1412 | 6 | case clang::ARM::BI_InterlockedExchangeAdd64_rel: |
1413 | 6 | return MSVCIntrin::_InterlockedExchangeAdd_rel; |
1414 | 1 | case clang::ARM::BI_InterlockedExchangeAdd8_nf: |
1415 | 2 | case clang::ARM::BI_InterlockedExchangeAdd16_nf: |
1416 | 5 | case clang::ARM::BI_InterlockedExchangeAdd_nf: |
1417 | 6 | case clang::ARM::BI_InterlockedExchangeAdd64_nf: |
1418 | 6 | return MSVCIntrin::_InterlockedExchangeAdd_nf; |
1419 | 1 | case clang::ARM::BI_InterlockedExchange8_acq: |
1420 | 2 | case clang::ARM::BI_InterlockedExchange16_acq: |
1421 | 5 | case clang::ARM::BI_InterlockedExchange_acq: |
1422 | 6 | case clang::ARM::BI_InterlockedExchange64_acq: |
1423 | 6 | return MSVCIntrin::_InterlockedExchange_acq; |
1424 | 1 | case clang::ARM::BI_InterlockedExchange8_rel: |
1425 | 2 | case clang::ARM::BI_InterlockedExchange16_rel: |
1426 | 5 | case clang::ARM::BI_InterlockedExchange_rel: |
1427 | 6 | case clang::ARM::BI_InterlockedExchange64_rel: |
1428 | 6 | return MSVCIntrin::_InterlockedExchange_rel; |
1429 | 1 | case clang::ARM::BI_InterlockedExchange8_nf: |
1430 | 2 | case clang::ARM::BI_InterlockedExchange16_nf: |
1431 | 5 | case clang::ARM::BI_InterlockedExchange_nf: |
1432 | 6 | case clang::ARM::BI_InterlockedExchange64_nf: |
1433 | 6 | return MSVCIntrin::_InterlockedExchange_nf; |
1434 | 1 | case clang::ARM::BI_InterlockedCompareExchange8_acq: |
1435 | 2 | case clang::ARM::BI_InterlockedCompareExchange16_acq: |
1436 | 5 | case clang::ARM::BI_InterlockedCompareExchange_acq: |
1437 | 6 | case clang::ARM::BI_InterlockedCompareExchange64_acq: |
1438 | 6 | return MSVCIntrin::_InterlockedCompareExchange_acq; |
1439 | 1 | case clang::ARM::BI_InterlockedCompareExchange8_rel: |
1440 | 2 | case clang::ARM::BI_InterlockedCompareExchange16_rel: |
1441 | 5 | case clang::ARM::BI_InterlockedCompareExchange_rel: |
1442 | 6 | case clang::ARM::BI_InterlockedCompareExchange64_rel: |
1443 | 6 | return MSVCIntrin::_InterlockedCompareExchange_rel; |
1444 | 1 | case clang::ARM::BI_InterlockedCompareExchange8_nf: |
1445 | 2 | case clang::ARM::BI_InterlockedCompareExchange16_nf: |
1446 | 5 | case clang::ARM::BI_InterlockedCompareExchange_nf: |
1447 | 6 | case clang::ARM::BI_InterlockedCompareExchange64_nf: |
1448 | 6 | return MSVCIntrin::_InterlockedCompareExchange_nf; |
1449 | 1 | case clang::ARM::BI_InterlockedOr8_acq: |
1450 | 2 | case clang::ARM::BI_InterlockedOr16_acq: |
1451 | 5 | case clang::ARM::BI_InterlockedOr_acq: |
1452 | 6 | case clang::ARM::BI_InterlockedOr64_acq: |
1453 | 6 | return MSVCIntrin::_InterlockedOr_acq; |
1454 | 1 | case clang::ARM::BI_InterlockedOr8_rel: |
1455 | 2 | case clang::ARM::BI_InterlockedOr16_rel: |
1456 | 5 | case clang::ARM::BI_InterlockedOr_rel: |
1457 | 6 | case clang::ARM::BI_InterlockedOr64_rel: |
1458 | 6 | return MSVCIntrin::_InterlockedOr_rel; |
1459 | 1 | case clang::ARM::BI_InterlockedOr8_nf: |
1460 | 2 | case clang::ARM::BI_InterlockedOr16_nf: |
1461 | 5 | case clang::ARM::BI_InterlockedOr_nf: |
1462 | 6 | case clang::ARM::BI_InterlockedOr64_nf: |
1463 | 6 | return MSVCIntrin::_InterlockedOr_nf; |
1464 | 1 | case clang::ARM::BI_InterlockedXor8_acq: |
1465 | 2 | case clang::ARM::BI_InterlockedXor16_acq: |
1466 | 5 | case clang::ARM::BI_InterlockedXor_acq: |
1467 | 6 | case clang::ARM::BI_InterlockedXor64_acq: |
1468 | 6 | return MSVCIntrin::_InterlockedXor_acq; |
1469 | 1 | case clang::ARM::BI_InterlockedXor8_rel: |
1470 | 2 | case clang::ARM::BI_InterlockedXor16_rel: |
1471 | 5 | case clang::ARM::BI_InterlockedXor_rel: |
1472 | 6 | case clang::ARM::BI_InterlockedXor64_rel: |
1473 | 6 | return MSVCIntrin::_InterlockedXor_rel; |
1474 | 1 | case clang::ARM::BI_InterlockedXor8_nf: |
1475 | 2 | case clang::ARM::BI_InterlockedXor16_nf: |
1476 | 5 | case clang::ARM::BI_InterlockedXor_nf: |
1477 | 6 | case clang::ARM::BI_InterlockedXor64_nf: |
1478 | 6 | return MSVCIntrin::_InterlockedXor_nf; |
1479 | 1 | case clang::ARM::BI_InterlockedAnd8_acq: |
1480 | 2 | case clang::ARM::BI_InterlockedAnd16_acq: |
1481 | 5 | case clang::ARM::BI_InterlockedAnd_acq: |
1482 | 6 | case clang::ARM::BI_InterlockedAnd64_acq: |
1483 | 6 | return MSVCIntrin::_InterlockedAnd_acq; |
1484 | 1 | case clang::ARM::BI_InterlockedAnd8_rel: |
1485 | 2 | case clang::ARM::BI_InterlockedAnd16_rel: |
1486 | 5 | case clang::ARM::BI_InterlockedAnd_rel: |
1487 | 6 | case clang::ARM::BI_InterlockedAnd64_rel: |
1488 | 6 | return MSVCIntrin::_InterlockedAnd_rel; |
1489 | 1 | case clang::ARM::BI_InterlockedAnd8_nf: |
1490 | 2 | case clang::ARM::BI_InterlockedAnd16_nf: |
1491 | 5 | case clang::ARM::BI_InterlockedAnd_nf: |
1492 | 6 | case clang::ARM::BI_InterlockedAnd64_nf: |
1493 | 6 | return MSVCIntrin::_InterlockedAnd_nf; |
1494 | 1 | case clang::ARM::BI_InterlockedIncrement16_acq: |
1495 | 4 | case clang::ARM::BI_InterlockedIncrement_acq: |
1496 | 5 | case clang::ARM::BI_InterlockedIncrement64_acq: |
1497 | 5 | return MSVCIntrin::_InterlockedIncrement_acq; |
1498 | 1 | case clang::ARM::BI_InterlockedIncrement16_rel: |
1499 | 4 | case clang::ARM::BI_InterlockedIncrement_rel: |
1500 | 5 | case clang::ARM::BI_InterlockedIncrement64_rel: |
1501 | 5 | return MSVCIntrin::_InterlockedIncrement_rel; |
1502 | 1 | case clang::ARM::BI_InterlockedIncrement16_nf: |
1503 | 4 | case clang::ARM::BI_InterlockedIncrement_nf: |
1504 | 5 | case clang::ARM::BI_InterlockedIncrement64_nf: |
1505 | 5 | return MSVCIntrin::_InterlockedIncrement_nf; |
1506 | 1 | case clang::ARM::BI_InterlockedDecrement16_acq: |
1507 | 4 | case clang::ARM::BI_InterlockedDecrement_acq: |
1508 | 5 | case clang::ARM::BI_InterlockedDecrement64_acq: |
1509 | 5 | return MSVCIntrin::_InterlockedDecrement_acq; |
1510 | 1 | case clang::ARM::BI_InterlockedDecrement16_rel: |
1511 | 4 | case clang::ARM::BI_InterlockedDecrement_rel: |
1512 | 5 | case clang::ARM::BI_InterlockedDecrement64_rel: |
1513 | 5 | return MSVCIntrin::_InterlockedDecrement_rel; |
1514 | 1 | case clang::ARM::BI_InterlockedDecrement16_nf: |
1515 | 4 | case clang::ARM::BI_InterlockedDecrement_nf: |
1516 | 5 | case clang::ARM::BI_InterlockedDecrement64_nf: |
1517 | 5 | return MSVCIntrin::_InterlockedDecrement_nf; |
1518 | 6.56k | } |
1519 | 0 | llvm_unreachable("must return from switch"); |
1520 | 0 | } |
1521 | | |
1522 | | static std::optional<CodeGenFunction::MSVCIntrin> |
1523 | 3.97k | translateAarch64ToMsvcIntrin(unsigned BuiltinID) { |
1524 | 3.97k | using MSVCIntrin = CodeGenFunction::MSVCIntrin; |
1525 | 3.97k | switch (BuiltinID) { |
1526 | 3.76k | default: |
1527 | 3.76k | return std::nullopt; |
1528 | 5 | case clang::AArch64::BI_BitScanForward: |
1529 | 6 | case clang::AArch64::BI_BitScanForward64: |
1530 | 6 | return MSVCIntrin::_BitScanForward; |
1531 | 5 | case clang::AArch64::BI_BitScanReverse: |
1532 | 6 | case clang::AArch64::BI_BitScanReverse64: |
1533 | 6 | return MSVCIntrin::_BitScanReverse; |
1534 | 1 | case clang::AArch64::BI_InterlockedAnd64: |
1535 | 1 | return MSVCIntrin::_InterlockedAnd; |
1536 | 1 | case clang::AArch64::BI_InterlockedExchange64: |
1537 | 1 | return MSVCIntrin::_InterlockedExchange; |
1538 | 1 | case clang::AArch64::BI_InterlockedExchangeAdd64: |
1539 | 1 | return MSVCIntrin::_InterlockedExchangeAdd; |
1540 | 1 | case clang::AArch64::BI_InterlockedExchangeSub64: |
1541 | 1 | return MSVCIntrin::_InterlockedExchangeSub; |
1542 | 1 | case clang::AArch64::BI_InterlockedOr64: |
1543 | 1 | return MSVCIntrin::_InterlockedOr; |
1544 | 1 | case clang::AArch64::BI_InterlockedXor64: |
1545 | 1 | return MSVCIntrin::_InterlockedXor; |
1546 | 1 | case clang::AArch64::BI_InterlockedDecrement64: |
1547 | 1 | return MSVCIntrin::_InterlockedDecrement; |
1548 | 1 | case clang::AArch64::BI_InterlockedIncrement64: |
1549 | 1 | return MSVCIntrin::_InterlockedIncrement; |
1550 | 1 | case clang::AArch64::BI_InterlockedExchangeAdd8_acq: |
1551 | 2 | case clang::AArch64::BI_InterlockedExchangeAdd16_acq: |
1552 | 7 | case clang::AArch64::BI_InterlockedExchangeAdd_acq: |
1553 | 8 | case clang::AArch64::BI_InterlockedExchangeAdd64_acq: |
1554 | 8 | return MSVCIntrin::_InterlockedExchangeAdd_acq; |
1555 | 1 | case clang::AArch64::BI_InterlockedExchangeAdd8_rel: |
1556 | 2 | case clang::AArch64::BI_InterlockedExchangeAdd16_rel: |
1557 | 7 | case clang::AArch64::BI_InterlockedExchangeAdd_rel: |
1558 | 8 | case clang::AArch64::BI_InterlockedExchangeAdd64_rel: |
1559 | 8 | return MSVCIntrin::_InterlockedExchangeAdd_rel; |
1560 | 1 | case clang::AArch64::BI_InterlockedExchangeAdd8_nf: |
1561 | 2 | case clang::AArch64::BI_InterlockedExchangeAdd16_nf: |
1562 | 7 | case clang::AArch64::BI_InterlockedExchangeAdd_nf: |
1563 | 8 | case clang::AArch64::BI_InterlockedExchangeAdd64_nf: |
1564 | 8 | return MSVCIntrin::_InterlockedExchangeAdd_nf; |
1565 | 1 | case clang::AArch64::BI_InterlockedExchange8_acq: |
1566 | 2 | case clang::AArch64::BI_InterlockedExchange16_acq: |
1567 | 7 | case clang::AArch64::BI_InterlockedExchange_acq: |
1568 | 8 | case clang::AArch64::BI_InterlockedExchange64_acq: |
1569 | 8 | return MSVCIntrin::_InterlockedExchange_acq; |
1570 | 1 | case clang::AArch64::BI_InterlockedExchange8_rel: |
1571 | 2 | case clang::AArch64::BI_InterlockedExchange16_rel: |
1572 | 7 | case clang::AArch64::BI_InterlockedExchange_rel: |
1573 | 8 | case clang::AArch64::BI_InterlockedExchange64_rel: |
1574 | 8 | return MSVCIntrin::_InterlockedExchange_rel; |
1575 | 1 | case clang::AArch64::BI_InterlockedExchange8_nf: |
1576 | 2 | case clang::AArch64::BI_InterlockedExchange16_nf: |
1577 | 7 | case clang::AArch64::BI_InterlockedExchange_nf: |
1578 | 8 | case clang::AArch64::BI_InterlockedExchange64_nf: |
1579 | 8 | return MSVCIntrin::_InterlockedExchange_nf; |
1580 | 1 | case clang::AArch64::BI_InterlockedCompareExchange8_acq: |
1581 | 2 | case clang::AArch64::BI_InterlockedCompareExchange16_acq: |
1582 | 7 | case clang::AArch64::BI_InterlockedCompareExchange_acq: |
1583 | 8 | case clang::AArch64::BI_InterlockedCompareExchange64_acq: |
1584 | 8 | return MSVCIntrin::_InterlockedCompareExchange_acq; |
1585 | 1 | case clang::AArch64::BI_InterlockedCompareExchange8_rel: |
1586 | 2 | case clang::AArch64::BI_InterlockedCompareExchange16_rel: |
1587 | 7 | case clang::AArch64::BI_InterlockedCompareExchange_rel: |
1588 | 8 | case clang::AArch64::BI_InterlockedCompareExchange64_rel: |
1589 | 8 | return MSVCIntrin::_InterlockedCompareExchange_rel; |
1590 | 1 | case clang::AArch64::BI_InterlockedCompareExchange8_nf: |
1591 | 2 | case clang::AArch64::BI_InterlockedCompareExchange16_nf: |
1592 | 7 | case clang::AArch64::BI_InterlockedCompareExchange_nf: |
1593 | 8 | case clang::AArch64::BI_InterlockedCompareExchange64_nf: |
1594 | 8 | return MSVCIntrin::_InterlockedCompareExchange_nf; |
1595 | 1 | case clang::AArch64::BI_InterlockedCompareExchange128: |
1596 | 1 | return MSVCIntrin::_InterlockedCompareExchange128; |
1597 | 1 | case clang::AArch64::BI_InterlockedCompareExchange128_acq: |
1598 | 1 | return MSVCIntrin::_InterlockedCompareExchange128_acq; |
1599 | 1 | case clang::AArch64::BI_InterlockedCompareExchange128_nf: |
1600 | 1 | return MSVCIntrin::_InterlockedCompareExchange128_nf; |
1601 | 1 | case clang::AArch64::BI_InterlockedCompareExchange128_rel: |
1602 | 1 | return MSVCIntrin::_InterlockedCompareExchange128_rel; |
1603 | 1 | case clang::AArch64::BI_InterlockedOr8_acq: |
1604 | 2 | case clang::AArch64::BI_InterlockedOr16_acq: |
1605 | 7 | case clang::AArch64::BI_InterlockedOr_acq: |
1606 | 8 | case clang::AArch64::BI_InterlockedOr64_acq: |
1607 | 8 | return MSVCIntrin::_InterlockedOr_acq; |
1608 | 1 | case clang::AArch64::BI_InterlockedOr8_rel: |
1609 | 2 | case clang::AArch64::BI_InterlockedOr16_rel: |
1610 | 7 | case clang::AArch64::BI_InterlockedOr_rel: |
1611 | 8 | case clang::AArch64::BI_InterlockedOr64_rel: |
1612 | 8 | return MSVCIntrin::_InterlockedOr_rel; |
1613 | 1 | case clang::AArch64::BI_InterlockedOr8_nf: |
1614 | 2 | case clang::AArch64::BI_InterlockedOr16_nf: |
1615 | 7 | case clang::AArch64::BI_InterlockedOr_nf: |
1616 | 8 | case clang::AArch64::BI_InterlockedOr64_nf: |
1617 | 8 | return MSVCIntrin::_InterlockedOr_nf; |
1618 | 1 | case clang::AArch64::BI_InterlockedXor8_acq: |
1619 | 2 | case clang::AArch64::BI_InterlockedXor16_acq: |
1620 | 7 | case clang::AArch64::BI_InterlockedXor_acq: |
1621 | 8 | case clang::AArch64::BI_InterlockedXor64_acq: |
1622 | 8 | return MSVCIntrin::_InterlockedXor_acq; |
1623 | 1 | case clang::AArch64::BI_InterlockedXor8_rel: |
1624 | 2 | case clang::AArch64::BI_InterlockedXor16_rel: |
1625 | 7 | case clang::AArch64::BI_InterlockedXor_rel: |
1626 | 8 | case clang::AArch64::BI_InterlockedXor64_rel: |
1627 | 8 | return MSVCIntrin::_InterlockedXor_rel; |
1628 | 1 | case clang::AArch64::BI_InterlockedXor8_nf: |
1629 | 2 | case clang::AArch64::BI_InterlockedXor16_nf: |
1630 | 7 | case clang::AArch64::BI_InterlockedXor_nf: |
1631 | 8 | case clang::AArch64::BI_InterlockedXor64_nf: |
1632 | 8 | return MSVCIntrin::_InterlockedXor_nf; |
1633 | 1 | case clang::AArch64::BI_InterlockedAnd8_acq: |
1634 | 2 | case clang::AArch64::BI_InterlockedAnd16_acq: |
1635 | 7 | case clang::AArch64::BI_InterlockedAnd_acq: |
1636 | 8 | case clang::AArch64::BI_InterlockedAnd64_acq: |
1637 | 8 | return MSVCIntrin::_InterlockedAnd_acq; |
1638 | 1 | case clang::AArch64::BI_InterlockedAnd8_rel: |
1639 | 2 | case clang::AArch64::BI_InterlockedAnd16_rel: |
1640 | 7 | case clang::AArch64::BI_InterlockedAnd_rel: |
1641 | 8 | case clang::AArch64::BI_InterlockedAnd64_rel: |
1642 | 8 | return MSVCIntrin::_InterlockedAnd_rel; |
1643 | 1 | case clang::AArch64::BI_InterlockedAnd8_nf: |
1644 | 2 | case clang::AArch64::BI_InterlockedAnd16_nf: |
1645 | 7 | case clang::AArch64::BI_InterlockedAnd_nf: |
1646 | 8 | case clang::AArch64::BI_InterlockedAnd64_nf: |
1647 | 8 | return MSVCIntrin::_InterlockedAnd_nf; |
1648 | 1 | case clang::AArch64::BI_InterlockedIncrement16_acq: |
1649 | 6 | case clang::AArch64::BI_InterlockedIncrement_acq: |
1650 | 7 | case clang::AArch64::BI_InterlockedIncrement64_acq: |
1651 | 7 | return MSVCIntrin::_InterlockedIncrement_acq; |
1652 | 1 | case clang::AArch64::BI_InterlockedIncrement16_rel: |
1653 | 6 | case clang::AArch64::BI_InterlockedIncrement_rel: |
1654 | 7 | case clang::AArch64::BI_InterlockedIncrement64_rel: |
1655 | 7 | return MSVCIntrin::_InterlockedIncrement_rel; |
1656 | 1 | case clang::AArch64::BI_InterlockedIncrement16_nf: |
1657 | 6 | case clang::AArch64::BI_InterlockedIncrement_nf: |
1658 | 7 | case clang::AArch64::BI_InterlockedIncrement64_nf: |
1659 | 7 | return MSVCIntrin::_InterlockedIncrement_nf; |
1660 | 1 | case clang::AArch64::BI_InterlockedDecrement16_acq: |
1661 | 6 | case clang::AArch64::BI_InterlockedDecrement_acq: |
1662 | 7 | case clang::AArch64::BI_InterlockedDecrement64_acq: |
1663 | 7 | return MSVCIntrin::_InterlockedDecrement_acq; |
1664 | 1 | case clang::AArch64::BI_InterlockedDecrement16_rel: |
1665 | 6 | case clang::AArch64::BI_InterlockedDecrement_rel: |
1666 | 7 | case clang::AArch64::BI_InterlockedDecrement64_rel: |
1667 | 7 | return MSVCIntrin::_InterlockedDecrement_rel; |
1668 | 1 | case clang::AArch64::BI_InterlockedDecrement16_nf: |
1669 | 6 | case clang::AArch64::BI_InterlockedDecrement_nf: |
1670 | 7 | case clang::AArch64::BI_InterlockedDecrement64_nf: |
1671 | 7 | return MSVCIntrin::_InterlockedDecrement_nf; |
1672 | 3.97k | } |
1673 | 0 | llvm_unreachable("must return from switch"); |
1674 | 0 | } |
1675 | | |
1676 | | static std::optional<CodeGenFunction::MSVCIntrin> |
1677 | 9.19k | translateX86ToMsvcIntrin(unsigned BuiltinID) { |
1678 | 9.19k | using MSVCIntrin = CodeGenFunction::MSVCIntrin; |
1679 | 9.19k | switch (BuiltinID) { |
1680 | 9.15k | default: |
1681 | 9.15k | return std::nullopt; |
1682 | 6 | case clang::X86::BI_BitScanForward: |
1683 | 11 | case clang::X86::BI_BitScanForward64: |
1684 | 11 | return MSVCIntrin::_BitScanForward; |
1685 | 6 | case clang::X86::BI_BitScanReverse: |
1686 | 11 | case clang::X86::BI_BitScanReverse64: |
1687 | 11 | return MSVCIntrin::_BitScanReverse; |
1688 | 2 | case clang::X86::BI_InterlockedAnd64: |
1689 | 2 | return MSVCIntrin::_InterlockedAnd; |
1690 | 1 | case clang::X86::BI_InterlockedCompareExchange128: |
1691 | 1 | return MSVCIntrin::_InterlockedCompareExchange128; |
1692 | 2 | case clang::X86::BI_InterlockedExchange64: |
1693 | 2 | return MSVCIntrin::_InterlockedExchange; |
1694 | 2 | case clang::X86::BI_InterlockedExchangeAdd64: |
1695 | 2 | return MSVCIntrin::_InterlockedExchangeAdd; |
1696 | 2 | case clang::X86::BI_InterlockedExchangeSub64: |
1697 | 2 | return MSVCIntrin::_InterlockedExchangeSub; |
1698 | 2 | case clang::X86::BI_InterlockedOr64: |
1699 | 2 | return MSVCIntrin::_InterlockedOr; |
1700 | 2 | case clang::X86::BI_InterlockedXor64: |
1701 | 2 | return MSVCIntrin::_InterlockedXor; |
1702 | 2 | case clang::X86::BI_InterlockedDecrement64: |
1703 | 2 | return MSVCIntrin::_InterlockedDecrement; |
1704 | 2 | case clang::X86::BI_InterlockedIncrement64: |
1705 | 2 | return MSVCIntrin::_InterlockedIncrement; |
1706 | 9.19k | } |
1707 | 0 | llvm_unreachable("must return from switch"); |
1708 | 0 | } |
1709 | | |
1710 | | // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated. |
1711 | | Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, |
1712 | 579 | const CallExpr *E) { |
1713 | 579 | switch (BuiltinID) { |
1714 | 21 | case MSVCIntrin::_BitScanForward: |
1715 | 42 | case MSVCIntrin::_BitScanReverse: { |
1716 | 42 | Address IndexAddress(EmitPointerWithAlignment(E->getArg(0))); |
1717 | 42 | Value *ArgValue = EmitScalarExpr(E->getArg(1)); |
1718 | | |
1719 | 42 | llvm::Type *ArgType = ArgValue->getType(); |
1720 | 42 | llvm::Type *IndexType = IndexAddress.getElementType(); |
1721 | 42 | llvm::Type *ResultType = ConvertType(E->getType()); |
1722 | | |
1723 | 42 | Value *ArgZero = llvm::Constant::getNullValue(ArgType); |
1724 | 42 | Value *ResZero = llvm::Constant::getNullValue(ResultType); |
1725 | 42 | Value *ResOne = llvm::ConstantInt::get(ResultType, 1); |
1726 | | |
1727 | 42 | BasicBlock *Begin = Builder.GetInsertBlock(); |
1728 | 42 | BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); |
1729 | 42 | Builder.SetInsertPoint(End); |
1730 | 42 | PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); |
1731 | | |
1732 | 42 | Builder.SetInsertPoint(Begin); |
1733 | 42 | Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); |
1734 | 42 | BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); |
1735 | 42 | Builder.CreateCondBr(IsZero, End, NotZero); |
1736 | 42 | Result->addIncoming(ResZero, Begin); |
1737 | | |
1738 | 42 | Builder.SetInsertPoint(NotZero); |
1739 | | |
1740 | 42 | if (BuiltinID == MSVCIntrin::_BitScanForward) { |
1741 | 21 | Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); |
1742 | 21 | Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); |
1743 | 21 | ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); |
1744 | 21 | Builder.CreateStore(ZeroCount, IndexAddress, false); |
1745 | 21 | } else { |
1746 | 21 | unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); |
1747 | 21 | Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); |
1748 | | |
1749 | 21 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); |
1750 | 21 | Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); |
1751 | 21 | ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); |
1752 | 21 | Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); |
1753 | 21 | Builder.CreateStore(Index, IndexAddress, false); |
1754 | 21 | } |
1755 | 42 | Builder.CreateBr(End); |
1756 | 42 | Result->addIncoming(ResOne, NotZero); |
1757 | | |
1758 | 42 | Builder.SetInsertPoint(End); |
1759 | 42 | return Result; |
1760 | 21 | } |
1761 | 26 | case MSVCIntrin::_InterlockedAnd: |
1762 | 26 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); |
1763 | 30 | case MSVCIntrin::_InterlockedExchange: |
1764 | 30 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); |
1765 | 26 | case MSVCIntrin::_InterlockedExchangeAdd: |
1766 | 26 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); |
1767 | 26 | case MSVCIntrin::_InterlockedExchangeSub: |
1768 | 26 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); |
1769 | 26 | case MSVCIntrin::_InterlockedOr: |
1770 | 26 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); |
1771 | 26 | case MSVCIntrin::_InterlockedXor: |
1772 | 26 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); |
1773 | 14 | case MSVCIntrin::_InterlockedExchangeAdd_acq: |
1774 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, |
1775 | 14 | AtomicOrdering::Acquire); |
1776 | 14 | case MSVCIntrin::_InterlockedExchangeAdd_rel: |
1777 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, |
1778 | 14 | AtomicOrdering::Release); |
1779 | 14 | case MSVCIntrin::_InterlockedExchangeAdd_nf: |
1780 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, |
1781 | 14 | AtomicOrdering::Monotonic); |
1782 | 14 | case MSVCIntrin::_InterlockedExchange_acq: |
1783 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, |
1784 | 14 | AtomicOrdering::Acquire); |
1785 | 14 | case MSVCIntrin::_InterlockedExchange_rel: |
1786 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, |
1787 | 14 | AtomicOrdering::Release); |
1788 | 14 | case MSVCIntrin::_InterlockedExchange_nf: |
1789 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, |
1790 | 14 | AtomicOrdering::Monotonic); |
1791 | 14 | case MSVCIntrin::_InterlockedCompareExchange_acq: |
1792 | 14 | return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); |
1793 | 14 | case MSVCIntrin::_InterlockedCompareExchange_rel: |
1794 | 14 | return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); |
1795 | 14 | case MSVCIntrin::_InterlockedCompareExchange_nf: |
1796 | 14 | return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); |
1797 | 2 | case MSVCIntrin::_InterlockedCompareExchange128: |
1798 | 2 | return EmitAtomicCmpXchg128ForMSIntrin( |
1799 | 2 | *this, E, AtomicOrdering::SequentiallyConsistent); |
1800 | 1 | case MSVCIntrin::_InterlockedCompareExchange128_acq: |
1801 | 1 | return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire); |
1802 | 1 | case MSVCIntrin::_InterlockedCompareExchange128_rel: |
1803 | 1 | return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release); |
1804 | 1 | case MSVCIntrin::_InterlockedCompareExchange128_nf: |
1805 | 1 | return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic); |
1806 | 14 | case MSVCIntrin::_InterlockedOr_acq: |
1807 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, |
1808 | 14 | AtomicOrdering::Acquire); |
1809 | 14 | case MSVCIntrin::_InterlockedOr_rel: |
1810 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, |
1811 | 14 | AtomicOrdering::Release); |
1812 | 14 | case MSVCIntrin::_InterlockedOr_nf: |
1813 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, |
1814 | 14 | AtomicOrdering::Monotonic); |
1815 | 14 | case MSVCIntrin::_InterlockedXor_acq: |
1816 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, |
1817 | 14 | AtomicOrdering::Acquire); |
1818 | 14 | case MSVCIntrin::_InterlockedXor_rel: |
1819 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, |
1820 | 14 | AtomicOrdering::Release); |
1821 | 14 | case MSVCIntrin::_InterlockedXor_nf: |
1822 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, |
1823 | 14 | AtomicOrdering::Monotonic); |
1824 | 14 | case MSVCIntrin::_InterlockedAnd_acq: |
1825 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, |
1826 | 14 | AtomicOrdering::Acquire); |
1827 | 14 | case MSVCIntrin::_InterlockedAnd_rel: |
1828 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, |
1829 | 14 | AtomicOrdering::Release); |
1830 | 14 | case MSVCIntrin::_InterlockedAnd_nf: |
1831 | 14 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, |
1832 | 14 | AtomicOrdering::Monotonic); |
1833 | 12 | case MSVCIntrin::_InterlockedIncrement_acq: |
1834 | 12 | return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); |
1835 | 12 | case MSVCIntrin::_InterlockedIncrement_rel: |
1836 | 12 | return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); |
1837 | 12 | case MSVCIntrin::_InterlockedIncrement_nf: |
1838 | 12 | return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); |
1839 | 12 | case MSVCIntrin::_InterlockedDecrement_acq: |
1840 | 12 | return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); |
1841 | 12 | case MSVCIntrin::_InterlockedDecrement_rel: |
1842 | 12 | return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); |
1843 | 12 | case MSVCIntrin::_InterlockedDecrement_nf: |
1844 | 12 | return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); |
1845 | | |
1846 | 22 | case MSVCIntrin::_InterlockedDecrement: |
1847 | 22 | return EmitAtomicDecrementValue(*this, E); |
1848 | 22 | case MSVCIntrin::_InterlockedIncrement: |
1849 | 22 | return EmitAtomicIncrementValue(*this, E); |
1850 | | |
1851 | 4 | case MSVCIntrin::__fastfail: { |
1852 | | // Request immediate process termination from the kernel. The instruction |
1853 | | // sequences to do this are documented on MSDN: |
1854 | | // https://msdn.microsoft.com/en-us/library/dn774154.aspx |
1855 | 4 | llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); |
1856 | 4 | StringRef Asm, Constraints; |
1857 | 4 | switch (ISA) { |
1858 | 0 | default: |
1859 | 0 | ErrorUnsupported(E, "__fastfail call for this architecture"); |
1860 | 0 | break; |
1861 | 1 | case llvm::Triple::x86: |
1862 | 2 | case llvm::Triple::x86_64: |
1863 | 2 | Asm = "int $$0x29"; |
1864 | 2 | Constraints = "{cx}"; |
1865 | 2 | break; |
1866 | 1 | case llvm::Triple::thumb: |
1867 | 1 | Asm = "udf #251"; |
1868 | 1 | Constraints = "{r0}"; |
1869 | 1 | break; |
1870 | 1 | case llvm::Triple::aarch64: |
1871 | 1 | Asm = "brk #0xF003"; |
1872 | 1 | Constraints = "{w0}"; |
1873 | 4 | } |
1874 | 4 | llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); |
1875 | 4 | llvm::InlineAsm *IA = |
1876 | 4 | llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); |
1877 | 4 | llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( |
1878 | 4 | getLLVMContext(), llvm::AttributeList::FunctionIndex, |
1879 | 4 | llvm::Attribute::NoReturn); |
1880 | 4 | llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); |
1881 | 4 | CI->setAttributes(NoReturnAttr); |
1882 | 4 | return CI; |
1883 | 4 | } |
1884 | 579 | } |
1885 | 0 | llvm_unreachable("Incorrect MSVC intrinsic!"); |
1886 | 0 | } |
1887 | | |
1888 | | namespace { |
1889 | | // ARC cleanup for __builtin_os_log_format |
1890 | | struct CallObjCArcUse final : EHScopeStack::Cleanup { |
1891 | 4 | CallObjCArcUse(llvm::Value *object) : object(object) {} |
1892 | | llvm::Value *object; |
1893 | | |
1894 | 4 | void Emit(CodeGenFunction &CGF, Flags flags) override { |
1895 | 4 | CGF.EmitARCIntrinsicUse(object); |
1896 | 4 | } |
1897 | | }; |
1898 | | } |
1899 | | |
1900 | | Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, |
1901 | 83 | BuiltinCheckKind Kind) { |
1902 | 83 | assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) |
1903 | 83 | && "Unsupported builtin check kind"); |
1904 | | |
1905 | 83 | Value *ArgValue = EmitScalarExpr(E); |
1906 | 83 | if (!SanOpts.has(SanitizerKind::Builtin)) |
1907 | 71 | return ArgValue; |
1908 | | |
1909 | 12 | SanitizerScope SanScope(this); |
1910 | 12 | Value *Cond = Builder.CreateICmpNE( |
1911 | 12 | ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); |
1912 | 12 | EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), |
1913 | 12 | SanitizerHandler::InvalidBuiltin, |
1914 | 12 | {EmitCheckSourceLocation(E->getExprLoc()), |
1915 | 12 | llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, |
1916 | 12 | std::nullopt); |
1917 | 12 | return ArgValue; |
1918 | 83 | } |
1919 | | |
1920 | 22 | static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) { |
1921 | 22 | return CGF.Builder.CreateBinaryIntrinsic( |
1922 | 22 | Intrinsic::abs, ArgValue, |
1923 | 22 | ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW)); |
1924 | 22 | } |
1925 | | |
1926 | | static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, |
1927 | 4 | bool SanitizeOverflow) { |
1928 | 4 | Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0)); |
1929 | | |
1930 | | // Try to eliminate overflow check. |
1931 | 4 | if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) { |
1932 | 0 | if (!VCI->isMinSignedValue()) |
1933 | 0 | return EmitAbs(CGF, ArgValue, true); |
1934 | 0 | } |
1935 | | |
1936 | 4 | CodeGenFunction::SanitizerScope SanScope(&CGF); |
1937 | | |
1938 | 4 | Constant *Zero = Constant::getNullValue(ArgValue->getType()); |
1939 | 4 | Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic( |
1940 | 4 | Intrinsic::ssub_with_overflow, Zero, ArgValue); |
1941 | 4 | Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0); |
1942 | 4 | Value *NotOverflow = CGF.Builder.CreateNot( |
1943 | 4 | CGF.Builder.CreateExtractValue(ResultAndOverflow, 1)); |
1944 | | |
1945 | | // TODO: support -ftrapv-handler. |
1946 | 4 | if (SanitizeOverflow) { |
1947 | 2 | CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}}, |
1948 | 2 | SanitizerHandler::NegateOverflow, |
1949 | 2 | {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()), |
1950 | 2 | CGF.EmitCheckTypeDescriptor(E->getType())}, |
1951 | 2 | {ArgValue}); |
1952 | 2 | } else |
1953 | 2 | CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow); |
1954 | | |
1955 | 4 | Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); |
1956 | 4 | return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs"); |
1957 | 4 | } |
1958 | | |
1959 | | /// Get the argument type for arguments to os_log_helper. |
1960 | 140 | static CanQualType getOSLogArgType(ASTContext &C, int Size) { |
1961 | 140 | QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); |
1962 | 140 | return C.getCanonicalType(UnsignedTy); |
1963 | 140 | } |
1964 | | |
1965 | | llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( |
1966 | | const analyze_os_log::OSLogBufferLayout &Layout, |
1967 | 59 | CharUnits BufferAlignment) { |
1968 | 59 | ASTContext &Ctx = getContext(); |
1969 | | |
1970 | 59 | llvm::SmallString<64> Name; |
1971 | 59 | { |
1972 | 59 | raw_svector_ostream OS(Name); |
1973 | 59 | OS << "__os_log_helper"; |
1974 | 59 | OS << "_" << BufferAlignment.getQuantity(); |
1975 | 59 | OS << "_" << int(Layout.getSummaryByte()); |
1976 | 59 | OS << "_" << int(Layout.getNumArgsByte()); |
1977 | 59 | for (const auto &Item : Layout.Items) |
1978 | 86 | OS << "_" << int(Item.getSizeByte()) << "_" |
1979 | 86 | << int(Item.getDescriptorByte()); |
1980 | 59 | } |
1981 | | |
1982 | 59 | if (llvm::Function *F = CGM.getModule().getFunction(Name)) |
1983 | 23 | return F; |
1984 | | |
1985 | 36 | llvm::SmallVector<QualType, 4> ArgTys; |
1986 | 36 | FunctionArgList Args; |
1987 | 36 | Args.push_back(ImplicitParamDecl::Create( |
1988 | 36 | Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, |
1989 | 36 | ImplicitParamKind::Other)); |
1990 | 36 | ArgTys.emplace_back(Ctx.VoidPtrTy); |
1991 | | |
1992 | 94 | for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I58 ) { |
1993 | 58 | char Size = Layout.Items[I].getSizeByte(); |
1994 | 58 | if (!Size) |
1995 | 2 | continue; |
1996 | | |
1997 | 56 | QualType ArgTy = getOSLogArgType(Ctx, Size); |
1998 | 56 | Args.push_back(ImplicitParamDecl::Create( |
1999 | 56 | Ctx, nullptr, SourceLocation(), |
2000 | 56 | &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, |
2001 | 56 | ImplicitParamKind::Other)); |
2002 | 56 | ArgTys.emplace_back(ArgTy); |
2003 | 56 | } |
2004 | | |
2005 | 36 | QualType ReturnTy = Ctx.VoidTy; |
2006 | | |
2007 | | // The helper function has linkonce_odr linkage to enable the linker to merge |
2008 | | // identical functions. To ensure the merging always happens, 'noinline' is |
2009 | | // attached to the function when compiling with -Oz. |
2010 | 36 | const CGFunctionInfo &FI = |
2011 | 36 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args); |
2012 | 36 | llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); |
2013 | 36 | llvm::Function *Fn = llvm::Function::Create( |
2014 | 36 | FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); |
2015 | 36 | Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); |
2016 | 36 | CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false); |
2017 | 36 | CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); |
2018 | 36 | Fn->setDoesNotThrow(); |
2019 | | |
2020 | | // Attach 'noinline' at -Oz. |
2021 | 36 | if (CGM.getCodeGenOpts().OptimizeSize == 2) |
2022 | 0 | Fn->addFnAttr(llvm::Attribute::NoInline); |
2023 | | |
2024 | 36 | auto NL = ApplyDebugLocation::CreateEmpty(*this); |
2025 | 36 | StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args); |
2026 | | |
2027 | | // Create a scope with an artificial location for the body of this function. |
2028 | 36 | auto AL = ApplyDebugLocation::CreateArtificial(*this); |
2029 | | |
2030 | 36 | CharUnits Offset; |
2031 | 36 | Address BufAddr = |
2032 | 36 | Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty, |
2033 | 36 | BufferAlignment); |
2034 | 36 | Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), |
2035 | 36 | Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); |
2036 | 36 | Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), |
2037 | 36 | Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); |
2038 | | |
2039 | 36 | unsigned I = 1; |
2040 | 58 | for (const auto &Item : Layout.Items) { |
2041 | 58 | Builder.CreateStore( |
2042 | 58 | Builder.getInt8(Item.getDescriptorByte()), |
2043 | 58 | Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); |
2044 | 58 | Builder.CreateStore( |
2045 | 58 | Builder.getInt8(Item.getSizeByte()), |
2046 | 58 | Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); |
2047 | | |
2048 | 58 | CharUnits Size = Item.size(); |
2049 | 58 | if (!Size.getQuantity()) |
2050 | 2 | continue; |
2051 | | |
2052 | 56 | Address Arg = GetAddrOfLocalVar(Args[I]); |
2053 | 56 | Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); |
2054 | 56 | Addr = Addr.withElementType(Arg.getElementType()); |
2055 | 56 | Builder.CreateStore(Builder.CreateLoad(Arg), Addr); |
2056 | 56 | Offset += Size; |
2057 | 56 | ++I; |
2058 | 56 | } |
2059 | | |
2060 | 36 | FinishFunction(); |
2061 | | |
2062 | 36 | return Fn; |
2063 | 59 | } |
2064 | | |
2065 | 59 | RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { |
2066 | 59 | assert(E.getNumArgs() >= 2 && |
2067 | 59 | "__builtin_os_log_format takes at least 2 arguments"); |
2068 | 59 | ASTContext &Ctx = getContext(); |
2069 | 59 | analyze_os_log::OSLogBufferLayout Layout; |
2070 | 59 | analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); |
2071 | 59 | Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); |
2072 | 59 | llvm::SmallVector<llvm::Value *, 4> RetainableOperands; |
2073 | | |
2074 | | // Ignore argument 1, the format string. It is not currently used. |
2075 | 59 | CallArgList Args; |
2076 | 59 | Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); |
2077 | | |
2078 | 86 | for (const auto &Item : Layout.Items) { |
2079 | 86 | int Size = Item.getSizeByte(); |
2080 | 86 | if (!Size) |
2081 | 2 | continue; |
2082 | | |
2083 | 84 | llvm::Value *ArgVal; |
2084 | | |
2085 | 84 | if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) { |
2086 | 4 | uint64_t Val = 0; |
2087 | 16 | for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I12 ) |
2088 | 12 | Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8; |
2089 | 4 | ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val)); |
2090 | 80 | } else if (const Expr *TheExpr = Item.getExpr()) { |
2091 | 78 | ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); |
2092 | | |
2093 | | // If a temporary object that requires destruction after the full |
2094 | | // expression is passed, push a lifetime-extended cleanup to extend its |
2095 | | // lifetime to the end of the enclosing block scope. |
2096 | 78 | auto LifetimeExtendObject = [&](const Expr *E) { |
2097 | 14 | E = E->IgnoreParenCasts(); |
2098 | | // Extend lifetimes of objects returned by function calls and message |
2099 | | // sends. |
2100 | | |
2101 | | // FIXME: We should do this in other cases in which temporaries are |
2102 | | // created including arguments of non-ARC types (e.g., C++ |
2103 | | // temporaries). |
2104 | 14 | if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E)10 ) |
2105 | 8 | return true; |
2106 | 6 | return false; |
2107 | 14 | }; |
2108 | | |
2109 | 78 | if (TheExpr->getType()->isObjCRetainableType() && |
2110 | 78 | getLangOpts().ObjCAutoRefCount21 && LifetimeExtendObject(TheExpr)14 ) { |
2111 | 8 | assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && |
2112 | 8 | "Only scalar can be a ObjC retainable type"); |
2113 | 8 | if (!isa<Constant>(ArgVal)) { |
2114 | 8 | CleanupKind Cleanup = getARCCleanupKind(); |
2115 | 8 | QualType Ty = TheExpr->getType(); |
2116 | 8 | Address Alloca = Address::invalid(); |
2117 | 8 | Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca); |
2118 | 8 | ArgVal = EmitARCRetain(Ty, ArgVal); |
2119 | 8 | Builder.CreateStore(ArgVal, Addr); |
2120 | 8 | pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty, |
2121 | 8 | CodeGenFunction::destroyARCStrongPrecise, |
2122 | 8 | Cleanup & EHCleanup); |
2123 | | |
2124 | | // Push a clang.arc.use call to ensure ARC optimizer knows that the |
2125 | | // argument has to be alive. |
2126 | 8 | if (CGM.getCodeGenOpts().OptimizationLevel != 0) |
2127 | 4 | pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal); |
2128 | 8 | } |
2129 | 8 | } |
2130 | 78 | } else { |
2131 | 2 | ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); |
2132 | 2 | } |
2133 | | |
2134 | 84 | unsigned ArgValSize = |
2135 | 84 | CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); |
2136 | 84 | llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), |
2137 | 84 | ArgValSize); |
2138 | 84 | ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); |
2139 | 84 | CanQualType ArgTy = getOSLogArgType(Ctx, Size); |
2140 | | // If ArgVal has type x86_fp80, zero-extend ArgVal. |
2141 | 84 | ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); |
2142 | 84 | Args.add(RValue::get(ArgVal), ArgTy); |
2143 | 84 | } |
2144 | | |
2145 | 59 | const CGFunctionInfo &FI = |
2146 | 59 | CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); |
2147 | 59 | llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( |
2148 | 59 | Layout, BufAddr.getAlignment()); |
2149 | 59 | EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); |
2150 | 59 | return RValue::get(BufAddr.getPointer()); |
2151 | 59 | } |
2152 | | |
2153 | | static bool isSpecialUnsignedMultiplySignedResult( |
2154 | | unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, |
2155 | 87 | WidthAndSignedness ResultInfo) { |
2156 | 87 | return BuiltinID == Builtin::BI__builtin_mul_overflow && |
2157 | 87 | Op1Info.Width == Op2Info.Width44 && Op2Info.Width == ResultInfo.Width44 && |
2158 | 87 | !Op1Info.Signed38 && !Op2Info.Signed21 && ResultInfo.Signed21 ; |
2159 | 87 | } |
2160 | | |
2161 | | static RValue EmitCheckedUnsignedMultiplySignedResult( |
2162 | | CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, |
2163 | | const clang::Expr *Op2, WidthAndSignedness Op2Info, |
2164 | | const clang::Expr *ResultArg, QualType ResultQTy, |
2165 | 9 | WidthAndSignedness ResultInfo) { |
2166 | 9 | assert(isSpecialUnsignedMultiplySignedResult( |
2167 | 9 | Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) && |
2168 | 9 | "Cannot specialize this multiply"); |
2169 | | |
2170 | 9 | llvm::Value *V1 = CGF.EmitScalarExpr(Op1); |
2171 | 9 | llvm::Value *V2 = CGF.EmitScalarExpr(Op2); |
2172 | | |
2173 | 9 | llvm::Value *HasOverflow; |
2174 | 9 | llvm::Value *Result = EmitOverflowIntrinsic( |
2175 | 9 | CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow); |
2176 | | |
2177 | | // The intrinsic call will detect overflow when the value is > UINT_MAX, |
2178 | | // however, since the original builtin had a signed result, we need to report |
2179 | | // an overflow when the result is greater than INT_MAX. |
2180 | 9 | auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width); |
2181 | 9 | llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax); |
2182 | | |
2183 | 9 | llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue); |
2184 | 9 | HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow); |
2185 | | |
2186 | 9 | bool isVolatile = |
2187 | 9 | ResultArg->getType()->getPointeeType().isVolatileQualified(); |
2188 | 9 | Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); |
2189 | 9 | CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, |
2190 | 9 | isVolatile); |
2191 | 9 | return RValue::get(HasOverflow); |
2192 | 9 | } |
2193 | | |
2194 | | /// Determine if a binop is a checked mixed-sign multiply we can specialize. |
2195 | | static bool isSpecialMixedSignMultiply(unsigned BuiltinID, |
2196 | | WidthAndSignedness Op1Info, |
2197 | | WidthAndSignedness Op2Info, |
2198 | 132 | WidthAndSignedness ResultInfo) { |
2199 | 132 | return BuiltinID == Builtin::BI__builtin_mul_overflow && |
2200 | 132 | std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width89 && |
2201 | 132 | Op1Info.Signed != Op2Info.Signed83 ; |
2202 | 132 | } |
2203 | | |
2204 | | /// Emit a checked mixed-sign multiply. This is a cheaper specialization of |
2205 | | /// the generic checked-binop irgen. |
2206 | | static RValue |
2207 | | EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, |
2208 | | WidthAndSignedness Op1Info, const clang::Expr *Op2, |
2209 | | WidthAndSignedness Op2Info, |
2210 | | const clang::Expr *ResultArg, QualType ResultQTy, |
2211 | 27 | WidthAndSignedness ResultInfo) { |
2212 | 27 | assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info, |
2213 | 27 | Op2Info, ResultInfo) && |
2214 | 27 | "Not a mixed-sign multipliction we can specialize"); |
2215 | | |
2216 | | // Emit the signed and unsigned operands. |
2217 | 27 | const clang::Expr *SignedOp = Op1Info.Signed ? Op115 : Op212 ; |
2218 | 27 | const clang::Expr *UnsignedOp = Op1Info.Signed ? Op215 : Op112 ; |
2219 | 27 | llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); |
2220 | 27 | llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); |
2221 | 27 | unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width15 : Op2Info.Width12 ; |
2222 | 27 | unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width15 : Op1Info.Width12 ; |
2223 | | |
2224 | | // One of the operands may be smaller than the other. If so, [s|z]ext it. |
2225 | 27 | if (SignedOpWidth < UnsignedOpWidth) |
2226 | 3 | Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext"); |
2227 | 27 | if (UnsignedOpWidth < SignedOpWidth) |
2228 | 3 | Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext"); |
2229 | | |
2230 | 27 | llvm::Type *OpTy = Signed->getType(); |
2231 | 27 | llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); |
2232 | 27 | Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); |
2233 | 27 | llvm::Type *ResTy = ResultPtr.getElementType(); |
2234 | 27 | unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width); |
2235 | | |
2236 | | // Take the absolute value of the signed operand. |
2237 | 27 | llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); |
2238 | 27 | llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed); |
2239 | 27 | llvm::Value *AbsSigned = |
2240 | 27 | CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed); |
2241 | | |
2242 | | // Perform a checked unsigned multiplication. |
2243 | 27 | llvm::Value *UnsignedOverflow; |
2244 | 27 | llvm::Value *UnsignedResult = |
2245 | 27 | EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned, |
2246 | 27 | Unsigned, UnsignedOverflow); |
2247 | | |
2248 | 27 | llvm::Value *Overflow, *Result; |
2249 | 27 | if (ResultInfo.Signed) { |
2250 | | // Signed overflow occurs if the result is greater than INT_MAX or lesser |
2251 | | // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). |
2252 | 21 | auto IntMax = |
2253 | 21 | llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth); |
2254 | 21 | llvm::Value *MaxResult = |
2255 | 21 | CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), |
2256 | 21 | CGF.Builder.CreateZExt(IsNegative, OpTy)); |
2257 | 21 | llvm::Value *SignedOverflow = |
2258 | 21 | CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult); |
2259 | 21 | Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow); |
2260 | | |
2261 | | // Prepare the signed result (possibly by negating it). |
2262 | 21 | llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult); |
2263 | 21 | llvm::Value *SignedResult = |
2264 | 21 | CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult); |
2265 | 21 | Result = CGF.Builder.CreateTrunc(SignedResult, ResTy); |
2266 | 21 | } else { |
2267 | | // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX. |
2268 | 6 | llvm::Value *Underflow = CGF.Builder.CreateAnd( |
2269 | 6 | IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); |
2270 | 6 | Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); |
2271 | 6 | if (ResultInfo.Width < OpWidth) { |
2272 | 3 | auto IntMax = |
2273 | 3 | llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth); |
2274 | 3 | llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( |
2275 | 3 | UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); |
2276 | 3 | Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); |
2277 | 3 | } |
2278 | | |
2279 | | // Negate the product if it would be negative in infinite precision. |
2280 | 6 | Result = CGF.Builder.CreateSelect( |
2281 | 6 | IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); |
2282 | | |
2283 | 6 | Result = CGF.Builder.CreateTrunc(Result, ResTy); |
2284 | 6 | } |
2285 | 27 | assert(Overflow && Result && "Missing overflow or result"); |
2286 | | |
2287 | 27 | bool isVolatile = |
2288 | 27 | ResultArg->getType()->getPointeeType().isVolatileQualified(); |
2289 | 27 | CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, |
2290 | 27 | isVolatile); |
2291 | 27 | return RValue::get(Overflow); |
2292 | 27 | } |
2293 | | |
2294 | | static bool |
2295 | | TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, |
2296 | 44 | llvm::SmallPtrSetImpl<const Decl *> &Seen) { |
2297 | 44 | if (const auto *Arr = Ctx.getAsArrayType(Ty)) |
2298 | 6 | Ty = Ctx.getBaseElementType(Arr); |
2299 | | |
2300 | 44 | const auto *Record = Ty->getAsCXXRecordDecl(); |
2301 | 44 | if (!Record) |
2302 | 12 | return false; |
2303 | | |
2304 | | // We've already checked this type, or are in the process of checking it. |
2305 | 32 | if (!Seen.insert(Record).second) |
2306 | 0 | return false; |
2307 | | |
2308 | 32 | assert(Record->hasDefinition() && |
2309 | 32 | "Incomplete types should already be diagnosed"); |
2310 | | |
2311 | 32 | if (Record->isDynamicClass()) |
2312 | 11 | return true; |
2313 | | |
2314 | 21 | for (FieldDecl *F : Record->fields()) { |
2315 | 20 | if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen)) |
2316 | 5 | return true; |
2317 | 20 | } |
2318 | 16 | return false; |
2319 | 21 | } |
2320 | | |
2321 | | /// Determine if the specified type requires laundering by checking if it is a |
2322 | | /// dynamic class type or contains a subobject which is a dynamic class type. |
2323 | 52 | static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { |
2324 | 52 | if (!CGM.getCodeGenOpts().StrictVTablePointers) |
2325 | 28 | return false; |
2326 | 24 | llvm::SmallPtrSet<const Decl *, 16> Seen; |
2327 | 24 | return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen); |
2328 | 52 | } |
2329 | | |
2330 | 180 | RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { |
2331 | 180 | llvm::Value *Src = EmitScalarExpr(E->getArg(0)); |
2332 | 180 | llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); |
2333 | | |
2334 | | // The builtin's shift arg may have a different type than the source arg and |
2335 | | // result, but the LLVM intrinsic uses the same type for all values. |
2336 | 180 | llvm::Type *Ty = Src->getType(); |
2337 | 180 | ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false); |
2338 | | |
2339 | | // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. |
2340 | 180 | unsigned IID = IsRotateRight ? Intrinsic::fshr88 : Intrinsic::fshl92 ; |
2341 | 180 | Function *F = CGM.getIntrinsic(IID, Ty); |
2342 | 180 | return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); |
2343 | 180 | } |
2344 | | |
2345 | | // Map math builtins for long-double to f128 version. |
2346 | 96 | static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) { |
2347 | 96 | switch (BuiltinID) { |
2348 | 0 | #define MUTATE_LDBL(func) \ |
2349 | 59 | case Builtin::BI__builtin_##func##l: \ |
2350 | 59 | return Builtin::BI__builtin_##func##f128; |
2351 | 1 | MUTATE_LDBL(0 sqrt) |
2352 | 1 | MUTATE_LDBL(0 cbrt) |
2353 | 1 | MUTATE_LDBL(0 fabs) |
2354 | 1 | MUTATE_LDBL(0 log) |
2355 | 1 | MUTATE_LDBL(0 log2) |
2356 | 1 | MUTATE_LDBL(0 log10) |
2357 | 1 | MUTATE_LDBL(0 log1p) |
2358 | 1 | MUTATE_LDBL(0 logb) |
2359 | 1 | MUTATE_LDBL(0 exp) |
2360 | 1 | MUTATE_LDBL(0 exp2) |
2361 | 1 | MUTATE_LDBL(0 expm1) |
2362 | 1 | MUTATE_LDBL(0 fdim) |
2363 | 1 | MUTATE_LDBL(0 hypot) |
2364 | 1 | MUTATE_LDBL(0 ilogb) |
2365 | 1 | MUTATE_LDBL(0 pow) |
2366 | 1 | MUTATE_LDBL(0 fmin) |
2367 | 1 | MUTATE_LDBL(0 fmax) |
2368 | 1 | MUTATE_LDBL(0 ceil) |
2369 | 1 | MUTATE_LDBL(0 trunc) |
2370 | 1 | MUTATE_LDBL(0 rint) |
2371 | 1 | MUTATE_LDBL(0 nearbyint) |
2372 | 1 | MUTATE_LDBL(0 round) |
2373 | 1 | MUTATE_LDBL(0 floor) |
2374 | 1 | MUTATE_LDBL(0 lround) |
2375 | 1 | MUTATE_LDBL(0 llround) |
2376 | 1 | MUTATE_LDBL(0 lrint) |
2377 | 1 | MUTATE_LDBL(0 llrint) |
2378 | 1 | MUTATE_LDBL(0 fmod) |
2379 | 1 | MUTATE_LDBL(0 modf) |
2380 | 1 | MUTATE_LDBL(0 nan) |
2381 | 1 | MUTATE_LDBL(0 nans) |
2382 | 0 | MUTATE_LDBL(inf) |
2383 | 1 | MUTATE_LDBL(0 fma) |
2384 | 1 | MUTATE_LDBL(0 sin) |
2385 | 1 | MUTATE_LDBL(0 cos) |
2386 | 1 | MUTATE_LDBL(0 tan) |
2387 | 1 | MUTATE_LDBL(0 sinh) |
2388 | 1 | MUTATE_LDBL(0 cosh) |
2389 | 1 | MUTATE_LDBL(0 tanh) |
2390 | 1 | MUTATE_LDBL(0 asin) |
2391 | 1 | MUTATE_LDBL(0 acos) |
2392 | 1 | MUTATE_LDBL(0 atan) |
2393 | 1 | MUTATE_LDBL(0 asinh) |
2394 | 1 | MUTATE_LDBL(0 acosh) |
2395 | 1 | MUTATE_LDBL(0 atanh) |
2396 | 1 | MUTATE_LDBL(0 atan2) |
2397 | 1 | MUTATE_LDBL(0 erf) |
2398 | 1 | MUTATE_LDBL(0 erfc) |
2399 | 1 | MUTATE_LDBL(0 ldexp) |
2400 | 1 | MUTATE_LDBL(0 frexp) |
2401 | 0 | MUTATE_LDBL(huge_val) |
2402 | 1 | MUTATE_LDBL(0 copysign) |
2403 | 1 | MUTATE_LDBL(0 nextafter) |
2404 | 2 | MUTATE_LDBL(0 nexttoward) |
2405 | 1 | MUTATE_LDBL(0 remainder) |
2406 | 1 | MUTATE_LDBL(0 remquo) |
2407 | 1 | MUTATE_LDBL(0 scalbln) |
2408 | 1 | MUTATE_LDBL(0 scalbn) |
2409 | 1 | MUTATE_LDBL(0 tgamma) |
2410 | 1 | MUTATE_LDBL(0 lgamma) |
2411 | 0 | #undef MUTATE_LDBL |
2412 | 37 | default: |
2413 | 37 | return BuiltinID; |
2414 | 96 | } |
2415 | 96 | } |
2416 | | |
2417 | | static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, |
2418 | 82 | Value *V) { |
2419 | 82 | if (CGF.Builder.getIsFPConstrained() && |
2420 | 82 | CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore20 ) { |
2421 | 20 | if (Value *Result = |
2422 | 20 | CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM)) |
2423 | 0 | return Result; |
2424 | 20 | } |
2425 | 82 | return nullptr; |
2426 | 82 | } |
2427 | | |
2428 | | static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, |
2429 | 1 | const FunctionDecl *FD) { |
2430 | 1 | auto Name = FD->getNameAsString() + "__hipstdpar_unsupported"; |
2431 | 1 | auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD); |
2432 | 1 | auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); |
2433 | | |
2434 | 1 | SmallVector<Value *, 16> Args; |
2435 | 1 | for (auto &&FormalTy : FnTy->params()) |
2436 | 0 | Args.push_back(llvm::PoisonValue::get(FormalTy)); |
2437 | | |
2438 | 1 | return RValue::get(CGF->Builder.CreateCall(UBF, Args)); |
2439 | 1 | } |
2440 | | |
2441 | | RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, |
2442 | | const CallExpr *E, |
2443 | 114k | ReturnValueSlot ReturnValue) { |
2444 | 114k | const FunctionDecl *FD = GD.getDecl()->getAsFunction(); |
2445 | | // See if we can constant fold this builtin. If so, don't emit it at all. |
2446 | | // TODO: Extend this handling to all builtin calls that we can constant-fold. |
2447 | 114k | Expr::EvalResult Result; |
2448 | 114k | if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext())98.6k && |
2449 | 114k | !Result.hasSideEffects()1.01k ) { |
2450 | 1.01k | if (Result.Val.isInt()) |
2451 | 601 | return RValue::get(llvm::ConstantInt::get(getLLVMContext(), |
2452 | 601 | Result.Val.getInt())); |
2453 | 413 | if (Result.Val.isFloat()) |
2454 | 206 | return RValue::get(llvm::ConstantFP::get(getLLVMContext(), |
2455 | 206 | Result.Val.getFloat())); |
2456 | 413 | } |
2457 | | |
2458 | | // If current long-double semantics is IEEE 128-bit, replace math builtins |
2459 | | // of long-double with f128 equivalent. |
2460 | | // TODO: This mutation should also be applied to other targets other than PPC, |
2461 | | // after backend supports IEEE 128-bit style libcalls. |
2462 | 113k | if (getTarget().getTriple().isPPC64() && |
2463 | 113k | &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()628 ) |
2464 | 96 | BuiltinID = mutateLongDoubleBuiltin(BuiltinID); |
2465 | | |
2466 | | // If the builtin has been declared explicitly with an assembler label, |
2467 | | // disable the specialized emitting below. Ideally we should communicate the |
2468 | | // rename in IR, or at least avoid generating the intrinsic calls that are |
2469 | | // likely to get lowered to the renamed library functions. |
2470 | 113k | const unsigned BuiltinIDIfNoAsmLabel = |
2471 | 113k | FD->hasAttr<AsmLabelAttr>() ? 0161 : BuiltinID113k ; |
2472 | | |
2473 | 113k | std::optional<bool> ErrnoOverriden; |
2474 | | // ErrnoOverriden is true if math-errno is overriden via the |
2475 | | // '#pragma float_control(precise, on)'. This pragma disables fast-math, |
2476 | | // which implies math-errno. |
2477 | 113k | if (E->hasStoredFPFeatures()) { |
2478 | 635 | FPOptionsOverride OP = E->getFPFeatures(); |
2479 | 635 | if (OP.hasMathErrnoOverride()) |
2480 | 13 | ErrnoOverriden = OP.getMathErrnoOverride(); |
2481 | 635 | } |
2482 | | // True if 'atttibute__((optnone)) is used. This attibute overrides |
2483 | | // fast-math which implies math-errno. |
2484 | 113k | bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>()113k ; |
2485 | | |
2486 | | // True if we are compiling at -O2 and errno has been disabled |
2487 | | // using the '#pragma float_control(precise, off)', and |
2488 | | // attribute opt-none hasn't been seen. |
2489 | 113k | bool ErrnoOverridenToFalseWithOpt = |
2490 | 113k | ErrnoOverriden.has_value() && !ErrnoOverriden.value()13 && !OptNone6 && |
2491 | 113k | CGM.getCodeGenOpts().OptimizationLevel != 03 ; |
2492 | | |
2493 | | // There are LLVM math intrinsics/instructions corresponding to math library |
2494 | | // functions except the LLVM op will never set errno while the math library |
2495 | | // might. Also, math builtins have the same semantics as their math library |
2496 | | // twins. Thus, we can transform math library and builtin calls to their |
2497 | | // LLVM counterparts if the call is marked 'const' (known to never set errno). |
2498 | | // In case FP exceptions are enabled, the experimental versions of the |
2499 | | // intrinsics model those. |
2500 | 113k | bool ConstAlways = |
2501 | 113k | getContext().BuiltinInfo.isConst(BuiltinID); |
2502 | | |
2503 | | // There's a special case with the fma builtins where they are always const |
2504 | | // if the target environment is GNU or the target is OS is Windows and we're |
2505 | | // targeting the MSVCRT.dll environment. |
2506 | | // FIXME: This list can be become outdated. Need to find a way to get it some |
2507 | | // other way. |
2508 | 113k | switch (BuiltinID) { |
2509 | 17 | case Builtin::BI__builtin_fma: |
2510 | 39 | case Builtin::BI__builtin_fmaf: |
2511 | 49 | case Builtin::BI__builtin_fmal: |
2512 | 61 | case Builtin::BIfma: |
2513 | 72 | case Builtin::BIfmaf: |
2514 | 82 | case Builtin::BIfmal: { |
2515 | 82 | auto &Trip = CGM.getTriple(); |
2516 | 82 | if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT()76 ) |
2517 | 12 | ConstAlways = true; |
2518 | 82 | break; |
2519 | 72 | } |
2520 | 113k | default: |
2521 | 113k | break; |
2522 | 113k | } |
2523 | | |
2524 | 113k | bool ConstWithoutErrnoAndExceptions = |
2525 | 113k | getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID); |
2526 | 113k | bool ConstWithoutExceptions = |
2527 | 113k | getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID); |
2528 | | |
2529 | | // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is |
2530 | | // disabled. |
2531 | | // Math intrinsics are generated only when math-errno is disabled. Any pragmas |
2532 | | // or attributes that affect math-errno should prevent or allow math |
2533 | | // intrincs to be generated. Intrinsics are generated: |
2534 | | // 1- In fast math mode, unless math-errno is overriden |
2535 | | // via '#pragma float_control(precise, on)', or via an |
2536 | | // 'attribute__((optnone))'. |
2537 | | // 2- If math-errno was enabled on command line but overriden |
2538 | | // to false via '#pragma float_control(precise, off))' and |
2539 | | // 'attribute__((optnone))' hasn't been used. |
2540 | | // 3- If we are compiling with optimization and errno has been disabled |
2541 | | // via '#pragma float_control(precise, off)', and |
2542 | | // 'attribute__((optnone))' hasn't been used. |
2543 | | |
2544 | 113k | bool ConstWithoutErrnoOrExceptions = |
2545 | 113k | ConstWithoutErrnoAndExceptions || ConstWithoutExceptions110k ; |
2546 | 113k | bool GenerateIntrinsics = |
2547 | 113k | (ConstAlways && !OptNone37.6k ) || |
2548 | 113k | (75.8k !getLangOpts().MathErrno75.8k && |
2549 | 75.8k | !(74.1k ErrnoOverriden.has_value()74.1k && ErrnoOverriden.value()7 ) && !OptNone74.1k ); |
2550 | 113k | if (!GenerateIntrinsics) { |
2551 | 1.69k | GenerateIntrinsics = |
2552 | 1.69k | ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions1.46k ; |
2553 | 1.69k | if (!GenerateIntrinsics) |
2554 | 1.68k | GenerateIntrinsics = |
2555 | 1.68k | ConstWithoutErrnoOrExceptions && |
2556 | 1.68k | (1.45k !getLangOpts().MathErrno1.45k && |
2557 | 1.45k | !(2 ErrnoOverriden.has_value()2 && ErrnoOverriden.value()2 ) && !OptNone1 ); |
2558 | 1.69k | if (!GenerateIntrinsics) |
2559 | 1.68k | GenerateIntrinsics = |
2560 | 1.68k | ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt1.45k ; |
2561 | 1.69k | } |
2562 | 113k | if (GenerateIntrinsics) { |
2563 | 111k | switch (BuiltinIDIfNoAsmLabel) { |
2564 | 7 | case Builtin::BIceil: |
2565 | 14 | case Builtin::BIceilf: |
2566 | 21 | case Builtin::BIceill: |
2567 | 33 | case Builtin::BI__builtin_ceil: |
2568 | 49 | case Builtin::BI__builtin_ceilf: |
2569 | 50 | case Builtin::BI__builtin_ceilf16: |
2570 | 62 | case Builtin::BI__builtin_ceill: |
2571 | 70 | case Builtin::BI__builtin_ceilf128: |
2572 | 70 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2573 | 70 | Intrinsic::ceil, |
2574 | 70 | Intrinsic::experimental_constrained_ceil)); |
2575 | | |
2576 | 8 | case Builtin::BIcopysign: |
2577 | 15 | case Builtin::BIcopysignf: |
2578 | 22 | case Builtin::BIcopysignl: |
2579 | 34 | case Builtin::BI__builtin_copysign: |
2580 | 45 | case Builtin::BI__builtin_copysignf: |
2581 | 46 | case Builtin::BI__builtin_copysignf16: |
2582 | 57 | case Builtin::BI__builtin_copysignl: |
2583 | 64 | case Builtin::BI__builtin_copysignf128: |
2584 | 64 | return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); |
2585 | | |
2586 | 6 | case Builtin::BIcos: |
2587 | 12 | case Builtin::BIcosf: |
2588 | 15 | case Builtin::BIcosl: |
2589 | 18 | case Builtin::BI__builtin_cos: |
2590 | 21 | case Builtin::BI__builtin_cosf: |
2591 | 22 | case Builtin::BI__builtin_cosf16: |
2592 | 25 | case Builtin::BI__builtin_cosl: |
2593 | 28 | case Builtin::BI__builtin_cosf128: |
2594 | 28 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2595 | 28 | Intrinsic::cos, |
2596 | 28 | Intrinsic::experimental_constrained_cos)); |
2597 | | |
2598 | 7 | case Builtin::BIexp: |
2599 | 12 | case Builtin::BIexpf: |
2600 | 17 | case Builtin::BIexpl: |
2601 | 20 | case Builtin::BI__builtin_exp: |
2602 | 26 | case Builtin::BI__builtin_expf: |
2603 | 27 | case Builtin::BI__builtin_expf16: |
2604 | 30 | case Builtin::BI__builtin_expl: |
2605 | 33 | case Builtin::BI__builtin_expf128: |
2606 | 33 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2607 | 33 | Intrinsic::exp, |
2608 | 33 | Intrinsic::experimental_constrained_exp)); |
2609 | | |
2610 | 3 | case Builtin::BIexp2: |
2611 | 6 | case Builtin::BIexp2f: |
2612 | 9 | case Builtin::BIexp2l: |
2613 | 12 | case Builtin::BI__builtin_exp2: |
2614 | 18 | case Builtin::BI__builtin_exp2f: |
2615 | 19 | case Builtin::BI__builtin_exp2f16: |
2616 | 22 | case Builtin::BI__builtin_exp2l: |
2617 | 25 | case Builtin::BI__builtin_exp2f128: |
2618 | 25 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2619 | 25 | Intrinsic::exp2, |
2620 | 25 | Intrinsic::experimental_constrained_exp2)); |
2621 | 3 | case Builtin::BI__builtin_exp10: |
2622 | 6 | case Builtin::BI__builtin_exp10f: |
2623 | 7 | case Builtin::BI__builtin_exp10f16: |
2624 | 10 | case Builtin::BI__builtin_exp10l: |
2625 | 13 | case Builtin::BI__builtin_exp10f128: { |
2626 | | // TODO: strictfp support |
2627 | 13 | if (Builder.getIsFPConstrained()) |
2628 | 4 | break; |
2629 | 9 | return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10)); |
2630 | 13 | } |
2631 | 10 | case Builtin::BIfabs: |
2632 | 18 | case Builtin::BIfabsf: |
2633 | 25 | case Builtin::BIfabsl: |
2634 | 59 | case Builtin::BI__builtin_fabs: |
2635 | 77 | case Builtin::BI__builtin_fabsf: |
2636 | 78 | case Builtin::BI__builtin_fabsf16: |
2637 | 91 | case Builtin::BI__builtin_fabsl: |
2638 | 98 | case Builtin::BI__builtin_fabsf128: |
2639 | 98 | return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); |
2640 | | |
2641 | 28 | case Builtin::BIfloor: |
2642 | 35 | case Builtin::BIfloorf: |
2643 | 42 | case Builtin::BIfloorl: |
2644 | 61 | case Builtin::BI__builtin_floor: |
2645 | 73 | case Builtin::BI__builtin_floorf: |
2646 | 74 | case Builtin::BI__builtin_floorf16: |
2647 | 86 | case Builtin::BI__builtin_floorl: |
2648 | 94 | case Builtin::BI__builtin_floorf128: |
2649 | 94 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2650 | 94 | Intrinsic::floor, |
2651 | 94 | Intrinsic::experimental_constrained_floor)); |
2652 | | |
2653 | 9 | case Builtin::BIfma: |
2654 | 17 | case Builtin::BIfmaf: |
2655 | 24 | case Builtin::BIfmal: |
2656 | 39 | case Builtin::BI__builtin_fma: |
2657 | 59 | case Builtin::BI__builtin_fmaf: |
2658 | 71 | case Builtin::BI__builtin_fmaf16: |
2659 | 76 | case Builtin::BI__builtin_fmal: |
2660 | 79 | case Builtin::BI__builtin_fmaf128: |
2661 | 79 | return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, |
2662 | 79 | Intrinsic::fma, |
2663 | 79 | Intrinsic::experimental_constrained_fma)); |
2664 | | |
2665 | 7 | case Builtin::BIfmax: |
2666 | 14 | case Builtin::BIfmaxf: |
2667 | 21 | case Builtin::BIfmaxl: |
2668 | 43 | case Builtin::BI__builtin_fmax: |
2669 | 60 | case Builtin::BI__builtin_fmaxf: |
2670 | 61 | case Builtin::BI__builtin_fmaxf16: |
2671 | 73 | case Builtin::BI__builtin_fmaxl: |
2672 | 81 | case Builtin::BI__builtin_fmaxf128: |
2673 | 81 | return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, |
2674 | 81 | Intrinsic::maxnum, |
2675 | 81 | Intrinsic::experimental_constrained_maxnum)); |
2676 | | |
2677 | 7 | case Builtin::BIfmin: |
2678 | 14 | case Builtin::BIfminf: |
2679 | 21 | case Builtin::BIfminl: |
2680 | 41 | case Builtin::BI__builtin_fmin: |
2681 | 58 | case Builtin::BI__builtin_fminf: |
2682 | 59 | case Builtin::BI__builtin_fminf16: |
2683 | 71 | case Builtin::BI__builtin_fminl: |
2684 | 79 | case Builtin::BI__builtin_fminf128: |
2685 | 79 | return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, |
2686 | 79 | Intrinsic::minnum, |
2687 | 79 | Intrinsic::experimental_constrained_minnum)); |
2688 | | |
2689 | | // fmod() is a special-case. It maps to the frem instruction rather than an |
2690 | | // LLVM intrinsic. |
2691 | 3 | case Builtin::BIfmod: |
2692 | 6 | case Builtin::BIfmodf: |
2693 | 9 | case Builtin::BIfmodl: |
2694 | 14 | case Builtin::BI__builtin_fmod: |
2695 | 19 | case Builtin::BI__builtin_fmodf: |
2696 | 20 | case Builtin::BI__builtin_fmodf16: |
2697 | 25 | case Builtin::BI__builtin_fmodl: |
2698 | 28 | case Builtin::BI__builtin_fmodf128: { |
2699 | 28 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
2700 | 28 | Value *Arg1 = EmitScalarExpr(E->getArg(0)); |
2701 | 28 | Value *Arg2 = EmitScalarExpr(E->getArg(1)); |
2702 | 28 | return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); |
2703 | 25 | } |
2704 | | |
2705 | 5 | case Builtin::BIlog: |
2706 | 10 | case Builtin::BIlogf: |
2707 | 15 | case Builtin::BIlogl: |
2708 | 18 | case Builtin::BI__builtin_log: |
2709 | 24 | case Builtin::BI__builtin_logf: |
2710 | 25 | case Builtin::BI__builtin_logf16: |
2711 | 28 | case Builtin::BI__builtin_logl: |
2712 | 31 | case Builtin::BI__builtin_logf128: |
2713 | 31 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2714 | 31 | Intrinsic::log, |
2715 | 31 | Intrinsic::experimental_constrained_log)); |
2716 | | |
2717 | 3 | case Builtin::BIlog10: |
2718 | 6 | case Builtin::BIlog10f: |
2719 | 9 | case Builtin::BIlog10l: |
2720 | 12 | case Builtin::BI__builtin_log10: |
2721 | 21 | case Builtin::BI__builtin_log10f: |
2722 | 22 | case Builtin::BI__builtin_log10f16: |
2723 | 25 | case Builtin::BI__builtin_log10l: |
2724 | 28 | case Builtin::BI__builtin_log10f128: |
2725 | 28 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2726 | 28 | Intrinsic::log10, |
2727 | 28 | Intrinsic::experimental_constrained_log10)); |
2728 | | |
2729 | 3 | case Builtin::BIlog2: |
2730 | 6 | case Builtin::BIlog2f: |
2731 | 9 | case Builtin::BIlog2l: |
2732 | 12 | case Builtin::BI__builtin_log2: |
2733 | 15 | case Builtin::BI__builtin_log2f: |
2734 | 16 | case Builtin::BI__builtin_log2f16: |
2735 | 19 | case Builtin::BI__builtin_log2l: |
2736 | 22 | case Builtin::BI__builtin_log2f128: |
2737 | 22 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2738 | 22 | Intrinsic::log2, |
2739 | 22 | Intrinsic::experimental_constrained_log2)); |
2740 | | |
2741 | 7 | case Builtin::BInearbyint: |
2742 | 14 | case Builtin::BInearbyintf: |
2743 | 21 | case Builtin::BInearbyintl: |
2744 | 33 | case Builtin::BI__builtin_nearbyint: |
2745 | 45 | case Builtin::BI__builtin_nearbyintf: |
2746 | 57 | case Builtin::BI__builtin_nearbyintl: |
2747 | 65 | case Builtin::BI__builtin_nearbyintf128: |
2748 | 65 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2749 | 65 | Intrinsic::nearbyint, |
2750 | 65 | Intrinsic::experimental_constrained_nearbyint)); |
2751 | | |
2752 | 5 | case Builtin::BIpow: |
2753 | 10 | case Builtin::BIpowf: |
2754 | 15 | case Builtin::BIpowl: |
2755 | 18 | case Builtin::BI__builtin_pow: |
2756 | 21 | case Builtin::BI__builtin_powf: |
2757 | 22 | case Builtin::BI__builtin_powf16: |
2758 | 25 | case Builtin::BI__builtin_powl: |
2759 | 28 | case Builtin::BI__builtin_powf128: |
2760 | 28 | return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, |
2761 | 28 | Intrinsic::pow, |
2762 | 28 | Intrinsic::experimental_constrained_pow)); |
2763 | | |
2764 | 7 | case Builtin::BIrint: |
2765 | 14 | case Builtin::BIrintf: |
2766 | 21 | case Builtin::BIrintl: |
2767 | 39 | case Builtin::BI__builtin_rint: |
2768 | 57 | case Builtin::BI__builtin_rintf: |
2769 | 58 | case Builtin::BI__builtin_rintf16: |
2770 | 70 | case Builtin::BI__builtin_rintl: |
2771 | 78 | case Builtin::BI__builtin_rintf128: |
2772 | 78 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2773 | 78 | Intrinsic::rint, |
2774 | 78 | Intrinsic::experimental_constrained_rint)); |
2775 | | |
2776 | 7 | case Builtin::BIround: |
2777 | 14 | case Builtin::BIroundf: |
2778 | 21 | case Builtin::BIroundl: |
2779 | 39 | case Builtin::BI__builtin_round: |
2780 | 57 | case Builtin::BI__builtin_roundf: |
2781 | 58 | case Builtin::BI__builtin_roundf16: |
2782 | 70 | case Builtin::BI__builtin_roundl: |
2783 | 78 | case Builtin::BI__builtin_roundf128: |
2784 | 78 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2785 | 78 | Intrinsic::round, |
2786 | 78 | Intrinsic::experimental_constrained_round)); |
2787 | | |
2788 | 0 | case Builtin::BIroundeven: |
2789 | 0 | case Builtin::BIroundevenf: |
2790 | 0 | case Builtin::BIroundevenl: |
2791 | 6 | case Builtin::BI__builtin_roundeven: |
2792 | 12 | case Builtin::BI__builtin_roundevenf: |
2793 | 12 | case Builtin::BI__builtin_roundevenf16: |
2794 | 14 | case Builtin::BI__builtin_roundevenl: |
2795 | 14 | case Builtin::BI__builtin_roundevenf128: |
2796 | 14 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2797 | 14 | Intrinsic::roundeven, |
2798 | 14 | Intrinsic::experimental_constrained_roundeven)); |
2799 | | |
2800 | 5 | case Builtin::BIsin: |
2801 | 11 | case Builtin::BIsinf: |
2802 | 14 | case Builtin::BIsinl: |
2803 | 20 | case Builtin::BI__builtin_sin: |
2804 | 23 | case Builtin::BI__builtin_sinf: |
2805 | 24 | case Builtin::BI__builtin_sinf16: |
2806 | 27 | case Builtin::BI__builtin_sinl: |
2807 | 30 | case Builtin::BI__builtin_sinf128: |
2808 | 30 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2809 | 30 | Intrinsic::sin, |
2810 | 30 | Intrinsic::experimental_constrained_sin)); |
2811 | | |
2812 | 6 | case Builtin::BIsqrt: |
2813 | 13 | case Builtin::BIsqrtf: |
2814 | 18 | case Builtin::BIsqrtl: |
2815 | 43 | case Builtin::BI__builtin_sqrt: |
2816 | 68 | case Builtin::BI__builtin_sqrtf: |
2817 | 70 | case Builtin::BI__builtin_sqrtf16: |
2818 | 75 | case Builtin::BI__builtin_sqrtl: |
2819 | 78 | case Builtin::BI__builtin_sqrtf128: |
2820 | 148 | case Builtin::BI__builtin_elementwise_sqrt: { |
2821 | 148 | llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin( |
2822 | 148 | *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt); |
2823 | 148 | SetSqrtFPAccuracy(Call); |
2824 | 148 | return RValue::get(Call); |
2825 | 78 | } |
2826 | 7 | case Builtin::BItrunc: |
2827 | 14 | case Builtin::BItruncf: |
2828 | 21 | case Builtin::BItruncl: |
2829 | 33 | case Builtin::BI__builtin_trunc: |
2830 | 45 | case Builtin::BI__builtin_truncf: |
2831 | 46 | case Builtin::BI__builtin_truncf16: |
2832 | 58 | case Builtin::BI__builtin_truncl: |
2833 | 66 | case Builtin::BI__builtin_truncf128: |
2834 | 66 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, |
2835 | 66 | Intrinsic::trunc, |
2836 | 66 | Intrinsic::experimental_constrained_trunc)); |
2837 | | |
2838 | 3 | case Builtin::BIlround: |
2839 | 6 | case Builtin::BIlroundf: |
2840 | 9 | case Builtin::BIlroundl: |
2841 | 14 | case Builtin::BI__builtin_lround: |
2842 | 19 | case Builtin::BI__builtin_lroundf: |
2843 | 24 | case Builtin::BI__builtin_lroundl: |
2844 | 27 | case Builtin::BI__builtin_lroundf128: |
2845 | 27 | return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( |
2846 | 27 | *this, E, Intrinsic::lround, |
2847 | 27 | Intrinsic::experimental_constrained_lround)); |
2848 | | |
2849 | 3 | case Builtin::BIllround: |
2850 | 6 | case Builtin::BIllroundf: |
2851 | 9 | case Builtin::BIllroundl: |
2852 | 12 | case Builtin::BI__builtin_llround: |
2853 | 15 | case Builtin::BI__builtin_llroundf: |
2854 | 18 | case Builtin::BI__builtin_llroundl: |
2855 | 21 | case Builtin::BI__builtin_llroundf128: |
2856 | 21 | return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( |
2857 | 21 | *this, E, Intrinsic::llround, |
2858 | 21 | Intrinsic::experimental_constrained_llround)); |
2859 | | |
2860 | 3 | case Builtin::BIlrint: |
2861 | 6 | case Builtin::BIlrintf: |
2862 | 9 | case Builtin::BIlrintl: |
2863 | 14 | case Builtin::BI__builtin_lrint: |
2864 | 19 | case Builtin::BI__builtin_lrintf: |
2865 | 24 | case Builtin::BI__builtin_lrintl: |
2866 | 27 | case Builtin::BI__builtin_lrintf128: |
2867 | 27 | return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( |
2868 | 27 | *this, E, Intrinsic::lrint, |
2869 | 27 | Intrinsic::experimental_constrained_lrint)); |
2870 | | |
2871 | 3 | case Builtin::BIllrint: |
2872 | 6 | case Builtin::BIllrintf: |
2873 | 9 | case Builtin::BIllrintl: |
2874 | 12 | case Builtin::BI__builtin_llrint: |
2875 | 15 | case Builtin::BI__builtin_llrintf: |
2876 | 18 | case Builtin::BI__builtin_llrintl: |
2877 | 21 | case Builtin::BI__builtin_llrintf128: |
2878 | 21 | return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( |
2879 | 21 | *this, E, Intrinsic::llrint, |
2880 | 21 | Intrinsic::experimental_constrained_llrint)); |
2881 | 4 | case Builtin::BI__builtin_ldexp: |
2882 | 8 | case Builtin::BI__builtin_ldexpf: |
2883 | 13 | case Builtin::BI__builtin_ldexpl: |
2884 | 16 | case Builtin::BI__builtin_ldexpf16: |
2885 | 18 | case Builtin::BI__builtin_ldexpf128: { |
2886 | 18 | return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin( |
2887 | 18 | *this, E, Intrinsic::ldexp, |
2888 | 18 | Intrinsic::experimental_constrained_ldexp)); |
2889 | 16 | } |
2890 | 110k | default: |
2891 | 110k | break; |
2892 | 111k | } |
2893 | 111k | } |
2894 | | |
2895 | | // Check NonnullAttribute/NullabilityArg and Alignment. |
2896 | 112k | auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg, |
2897 | 112k | unsigned ParmNum) { |
2898 | 430 | Value *Val = A.getPointer(); |
2899 | 430 | EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD, |
2900 | 430 | ParmNum); |
2901 | | |
2902 | 430 | if (SanOpts.has(SanitizerKind::Alignment) && ClSanitizeAlignmentBuiltin36 ) { |
2903 | 24 | SanitizerSet SkippedChecks; |
2904 | 24 | SkippedChecks.set(SanitizerKind::All); |
2905 | 24 | SkippedChecks.clear(SanitizerKind::Alignment); |
2906 | 24 | SourceLocation Loc = Arg->getExprLoc(); |
2907 | | // Strip an implicit cast. |
2908 | 24 | if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg)) |
2909 | 22 | if (CE->getCastKind() == CK_BitCast) |
2910 | 14 | Arg = CE->getSubExpr(); |
2911 | 24 | EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(), |
2912 | 24 | SkippedChecks); |
2913 | 24 | } |
2914 | 430 | }; |
2915 | | |
2916 | 112k | switch (BuiltinIDIfNoAsmLabel) { |
2917 | 85.1k | default: break; |
2918 | 85.1k | case Builtin::BI__builtin___CFStringMakeConstantString: |
2919 | 173 | case Builtin::BI__builtin___NSStringMakeConstantString: |
2920 | 173 | return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); |
2921 | 0 | case Builtin::BI__builtin_stdarg_start: |
2922 | 271 | case Builtin::BI__builtin_va_start: |
2923 | 271 | case Builtin::BI__va_start: |
2924 | 503 | case Builtin::BI__builtin_va_end: |
2925 | 503 | EmitVAStartEnd(BuiltinID == Builtin::BI__va_start |
2926 | 503 | ? EmitScalarExpr(E->getArg(0))0 |
2927 | 503 | : EmitVAListRef(E->getArg(0)).getPointer(), |
2928 | 503 | BuiltinID != Builtin::BI__builtin_va_end); |
2929 | 503 | return RValue::get(nullptr); |
2930 | 10 | case Builtin::BI__builtin_va_copy: { |
2931 | 10 | Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); |
2932 | 10 | Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); |
2933 | 10 | Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr}); |
2934 | 10 | return RValue::get(nullptr); |
2935 | 271 | } |
2936 | 6 | case Builtin::BIabs: |
2937 | 7 | case Builtin::BIlabs: |
2938 | 8 | case Builtin::BIllabs: |
2939 | 18 | case Builtin::BI__builtin_abs: |
2940 | 22 | case Builtin::BI__builtin_labs: |
2941 | 26 | case Builtin::BI__builtin_llabs: { |
2942 | 26 | bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow); |
2943 | | |
2944 | 26 | Value *Result; |
2945 | 26 | switch (getLangOpts().getSignedOverflowBehavior()) { |
2946 | 2 | case LangOptions::SOB_Defined: |
2947 | 2 | Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false); |
2948 | 2 | break; |
2949 | 22 | case LangOptions::SOB_Undefined: |
2950 | 22 | if (!SanitizeOverflow) { |
2951 | 20 | Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true); |
2952 | 20 | break; |
2953 | 20 | } |
2954 | 22 | [[fallthrough]];2 |
2955 | 4 | case LangOptions::SOB_Trapping: |
2956 | | // TODO: Somehow handle the corner case when the address of abs is taken. |
2957 | 4 | Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow); |
2958 | 4 | break; |
2959 | 26 | } |
2960 | 26 | return RValue::get(Result); |
2961 | 26 | } |
2962 | 4 | case Builtin::BI__builtin_complex: { |
2963 | 4 | Value *Real = EmitScalarExpr(E->getArg(0)); |
2964 | 4 | Value *Imag = EmitScalarExpr(E->getArg(1)); |
2965 | 4 | return RValue::getComplex({Real, Imag}); |
2966 | 26 | } |
2967 | 6 | case Builtin::BI__builtin_conj: |
2968 | 12 | case Builtin::BI__builtin_conjf: |
2969 | 18 | case Builtin::BI__builtin_conjl: |
2970 | 22 | case Builtin::BIconj: |
2971 | 26 | case Builtin::BIconjf: |
2972 | 30 | case Builtin::BIconjl: { |
2973 | 30 | ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); |
2974 | 30 | Value *Real = ComplexVal.first; |
2975 | 30 | Value *Imag = ComplexVal.second; |
2976 | 30 | Imag = Builder.CreateFNeg(Imag, "neg"); |
2977 | 30 | return RValue::getComplex(std::make_pair(Real, Imag)); |
2978 | 26 | } |
2979 | 4 | case Builtin::BI__builtin_creal: |
2980 | 8 | case Builtin::BI__builtin_crealf: |
2981 | 12 | case Builtin::BI__builtin_creall: |
2982 | 16 | case Builtin::BIcreal: |
2983 | 19 | case Builtin::BIcrealf: |
2984 | 22 | case Builtin::BIcreall: { |
2985 | 22 | ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); |
2986 | 22 | return RValue::get(ComplexVal.first); |
2987 | 19 | } |
2988 | | |
2989 | 20 | case Builtin::BI__builtin_preserve_access_index: { |
2990 | | // Only enabled preserved access index region when debuginfo |
2991 | | // is available as debuginfo is needed to preserve user-level |
2992 | | // access pattern. |
2993 | 20 | if (!getDebugInfo()) { |
2994 | 0 | CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g"); |
2995 | 0 | return RValue::get(EmitScalarExpr(E->getArg(0))); |
2996 | 0 | } |
2997 | | |
2998 | | // Nested builtin_preserve_access_index() not supported |
2999 | 20 | if (IsInPreservedAIRegion) { |
3000 | 0 | CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported"); |
3001 | 0 | return RValue::get(EmitScalarExpr(E->getArg(0))); |
3002 | 0 | } |
3003 | | |
3004 | 20 | IsInPreservedAIRegion = true; |
3005 | 20 | Value *Res = EmitScalarExpr(E->getArg(0)); |
3006 | 20 | IsInPreservedAIRegion = false; |
3007 | 20 | return RValue::get(Res); |
3008 | 20 | } |
3009 | | |
3010 | 4 | case Builtin::BI__builtin_cimag: |
3011 | 8 | case Builtin::BI__builtin_cimagf: |
3012 | 12 | case Builtin::BI__builtin_cimagl: |
3013 | 15 | case Builtin::BIcimag: |
3014 | 18 | case Builtin::BIcimagf: |
3015 | 21 | case Builtin::BIcimagl: { |
3016 | 21 | ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); |
3017 | 21 | return RValue::get(ComplexVal.second); |
3018 | 18 | } |
3019 | | |
3020 | 1 | case Builtin::BI__builtin_clrsb: |
3021 | 1 | case Builtin::BI__builtin_clrsbl: |
3022 | 2 | case Builtin::BI__builtin_clrsbll: { |
3023 | | // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or |
3024 | 2 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3025 | | |
3026 | 2 | llvm::Type *ArgType = ArgValue->getType(); |
3027 | 2 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); |
3028 | | |
3029 | 2 | llvm::Type *ResultType = ConvertType(E->getType()); |
3030 | 2 | Value *Zero = llvm::Constant::getNullValue(ArgType); |
3031 | 2 | Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); |
3032 | 2 | Value *Inverse = Builder.CreateNot(ArgValue, "not"); |
3033 | 2 | Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); |
3034 | 2 | Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()}); |
3035 | 2 | Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1)); |
3036 | 2 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3037 | 2 | "cast"); |
3038 | 2 | return RValue::get(Result); |
3039 | 1 | } |
3040 | 2 | case Builtin::BI__builtin_ctzs: |
3041 | 14 | case Builtin::BI__builtin_ctz: |
3042 | 18 | case Builtin::BI__builtin_ctzl: |
3043 | 31 | case Builtin::BI__builtin_ctzll: { |
3044 | 31 | Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); |
3045 | | |
3046 | 31 | llvm::Type *ArgType = ArgValue->getType(); |
3047 | 31 | Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); |
3048 | | |
3049 | 31 | llvm::Type *ResultType = ConvertType(E->getType()); |
3050 | 31 | Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); |
3051 | 31 | Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); |
3052 | 31 | if (Result->getType() != ResultType) |
3053 | 19 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3054 | 19 | "cast"); |
3055 | 31 | return RValue::get(Result); |
3056 | 18 | } |
3057 | 2 | case Builtin::BI__builtin_clzs: |
3058 | 29 | case Builtin::BI__builtin_clz: |
3059 | 40 | case Builtin::BI__builtin_clzl: |
3060 | 52 | case Builtin::BI__builtin_clzll: { |
3061 | 52 | Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); |
3062 | | |
3063 | 52 | llvm::Type *ArgType = ArgValue->getType(); |
3064 | 52 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); |
3065 | | |
3066 | 52 | llvm::Type *ResultType = ConvertType(E->getType()); |
3067 | 52 | Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); |
3068 | 52 | Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); |
3069 | 52 | if (Result->getType() != ResultType) |
3070 | 25 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3071 | 25 | "cast"); |
3072 | 52 | return RValue::get(Result); |
3073 | 40 | } |
3074 | 2 | case Builtin::BI__builtin_ffs: |
3075 | 4 | case Builtin::BI__builtin_ffsl: |
3076 | 6 | case Builtin::BI__builtin_ffsll: { |
3077 | | // ffs(x) -> x ? cttz(x) + 1 : 0 |
3078 | 6 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3079 | | |
3080 | 6 | llvm::Type *ArgType = ArgValue->getType(); |
3081 | 6 | Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); |
3082 | | |
3083 | 6 | llvm::Type *ResultType = ConvertType(E->getType()); |
3084 | 6 | Value *Tmp = |
3085 | 6 | Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), |
3086 | 6 | llvm::ConstantInt::get(ArgType, 1)); |
3087 | 6 | Value *Zero = llvm::Constant::getNullValue(ArgType); |
3088 | 6 | Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); |
3089 | 6 | Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); |
3090 | 6 | if (Result->getType() != ResultType) |
3091 | 4 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3092 | 4 | "cast"); |
3093 | 6 | return RValue::get(Result); |
3094 | 4 | } |
3095 | 2 | case Builtin::BI__builtin_parity: |
3096 | 4 | case Builtin::BI__builtin_parityl: |
3097 | 6 | case Builtin::BI__builtin_parityll: { |
3098 | | // parity(x) -> ctpop(x) & 1 |
3099 | 6 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3100 | | |
3101 | 6 | llvm::Type *ArgType = ArgValue->getType(); |
3102 | 6 | Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); |
3103 | | |
3104 | 6 | llvm::Type *ResultType = ConvertType(E->getType()); |
3105 | 6 | Value *Tmp = Builder.CreateCall(F, ArgValue); |
3106 | 6 | Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); |
3107 | 6 | if (Result->getType() != ResultType) |
3108 | 4 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3109 | 4 | "cast"); |
3110 | 6 | return RValue::get(Result); |
3111 | 4 | } |
3112 | 10 | case Builtin::BI__lzcnt16: |
3113 | 20 | case Builtin::BI__lzcnt: |
3114 | 30 | case Builtin::BI__lzcnt64: { |
3115 | 30 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3116 | | |
3117 | 30 | llvm::Type *ArgType = ArgValue->getType(); |
3118 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); |
3119 | | |
3120 | 30 | llvm::Type *ResultType = ConvertType(E->getType()); |
3121 | 30 | Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); |
3122 | 30 | if (Result->getType() != ResultType) |
3123 | 0 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3124 | 0 | "cast"); |
3125 | 30 | return RValue::get(Result); |
3126 | 20 | } |
3127 | 10 | case Builtin::BI__popcnt16: |
3128 | 20 | case Builtin::BI__popcnt: |
3129 | 30 | case Builtin::BI__popcnt64: |
3130 | 39 | case Builtin::BI__builtin_popcount: |
3131 | 41 | case Builtin::BI__builtin_popcountl: |
3132 | 50 | case Builtin::BI__builtin_popcountll: { |
3133 | 50 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3134 | | |
3135 | 50 | llvm::Type *ArgType = ArgValue->getType(); |
3136 | 50 | Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); |
3137 | | |
3138 | 50 | llvm::Type *ResultType = ConvertType(E->getType()); |
3139 | 50 | Value *Result = Builder.CreateCall(F, ArgValue); |
3140 | 50 | if (Result->getType() != ResultType) |
3141 | 11 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3142 | 11 | "cast"); |
3143 | 50 | return RValue::get(Result); |
3144 | 41 | } |
3145 | 7 | case Builtin::BI__builtin_unpredictable: { |
3146 | | // Always return the argument of __builtin_unpredictable. LLVM does not |
3147 | | // handle this builtin. Metadata for this builtin should be added directly |
3148 | | // to instructions such as branches or switches that use it. |
3149 | 7 | return RValue::get(EmitScalarExpr(E->getArg(0))); |
3150 | 41 | } |
3151 | 144 | case Builtin::BI__builtin_expect: { |
3152 | 144 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3153 | 144 | llvm::Type *ArgType = ArgValue->getType(); |
3154 | | |
3155 | 144 | Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); |
3156 | | // Don't generate llvm.expect on -O0 as the backend won't use it for |
3157 | | // anything. |
3158 | | // Note, we still IRGen ExpectedValue because it could have side-effects. |
3159 | 144 | if (CGM.getCodeGenOpts().OptimizationLevel == 0) |
3160 | 113 | return RValue::get(ArgValue); |
3161 | | |
3162 | 31 | Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); |
3163 | 31 | Value *Result = |
3164 | 31 | Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); |
3165 | 31 | return RValue::get(Result); |
3166 | 144 | } |
3167 | 12 | case Builtin::BI__builtin_expect_with_probability: { |
3168 | 12 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3169 | 12 | llvm::Type *ArgType = ArgValue->getType(); |
3170 | | |
3171 | 12 | Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); |
3172 | 12 | llvm::APFloat Probability(0.0); |
3173 | 12 | const Expr *ProbArg = E->getArg(2); |
3174 | 12 | bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext()); |
3175 | 12 | assert(EvalSucceed && "probability should be able to evaluate as float"); |
3176 | 12 | (void)EvalSucceed; |
3177 | 12 | bool LoseInfo = false; |
3178 | 12 | Probability.convert(llvm::APFloat::IEEEdouble(), |
3179 | 12 | llvm::RoundingMode::Dynamic, &LoseInfo); |
3180 | 12 | llvm::Type *Ty = ConvertType(ProbArg->getType()); |
3181 | 12 | Constant *Confidence = ConstantFP::get(Ty, Probability); |
3182 | | // Don't generate llvm.expect.with.probability on -O0 as the backend |
3183 | | // won't use it for anything. |
3184 | | // Note, we still IRGen ExpectedValue because it could have side-effects. |
3185 | 12 | if (CGM.getCodeGenOpts().OptimizationLevel == 0) |
3186 | 6 | return RValue::get(ArgValue); |
3187 | | |
3188 | 6 | Function *FnExpect = |
3189 | 6 | CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType); |
3190 | 6 | Value *Result = Builder.CreateCall( |
3191 | 6 | FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval"); |
3192 | 6 | return RValue::get(Result); |
3193 | 12 | } |
3194 | 31 | case Builtin::BI__builtin_assume_aligned: { |
3195 | 31 | const Expr *Ptr = E->getArg(0); |
3196 | 31 | Value *PtrValue = EmitScalarExpr(Ptr); |
3197 | 31 | Value *OffsetValue = |
3198 | 31 | (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2))11 : nullptr20 ; |
3199 | | |
3200 | 31 | Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); |
3201 | 31 | ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); |
3202 | 31 | if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) |
3203 | 0 | AlignmentCI = ConstantInt::get(AlignmentCI->getType(), |
3204 | 0 | llvm::Value::MaximumAlignment); |
3205 | | |
3206 | 31 | emitAlignmentAssumption(PtrValue, Ptr, |
3207 | 31 | /*The expr loc is sufficient.*/ SourceLocation(), |
3208 | 31 | AlignmentCI, OffsetValue); |
3209 | 31 | return RValue::get(PtrValue); |
3210 | 12 | } |
3211 | 0 | case Builtin::BI__assume: |
3212 | 12 | case Builtin::BI__builtin_assume: { |
3213 | 12 | if (E->getArg(0)->HasSideEffects(getContext())) |
3214 | 4 | return RValue::get(nullptr); |
3215 | | |
3216 | 8 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3217 | 8 | Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); |
3218 | 8 | Builder.CreateCall(FnAssume, ArgValue); |
3219 | 8 | return RValue::get(nullptr); |
3220 | 12 | } |
3221 | 2 | case Builtin::BI__builtin_assume_separate_storage: { |
3222 | 2 | const Expr *Arg0 = E->getArg(0); |
3223 | 2 | const Expr *Arg1 = E->getArg(1); |
3224 | | |
3225 | 2 | Value *Value0 = EmitScalarExpr(Arg0); |
3226 | 2 | Value *Value1 = EmitScalarExpr(Arg1); |
3227 | | |
3228 | 2 | Value *Values[] = {Value0, Value1}; |
3229 | 2 | OperandBundleDefT<Value *> OBD("separate_storage", Values); |
3230 | 2 | Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); |
3231 | 2 | return RValue::get(nullptr); |
3232 | 12 | } |
3233 | 42 | case Builtin::BI__arithmetic_fence: { |
3234 | | // Create the builtin call if FastMath is selected, and the target |
3235 | | // supports the builtin, otherwise just return the argument. |
3236 | 42 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3237 | 42 | llvm::FastMathFlags FMF = Builder.getFastMathFlags(); |
3238 | 42 | bool isArithmeticFenceEnabled = |
3239 | 42 | FMF.allowReassoc() && |
3240 | 42 | getContext().getTargetInfo().checkArithmeticFenceSupported()30 ; |
3241 | 42 | QualType ArgType = E->getArg(0)->getType(); |
3242 | 42 | if (ArgType->isComplexType()) { |
3243 | 7 | if (isArithmeticFenceEnabled) { |
3244 | 5 | QualType ElementType = ArgType->castAs<ComplexType>()->getElementType(); |
3245 | 5 | ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); |
3246 | 5 | Value *Real = Builder.CreateArithmeticFence(ComplexVal.first, |
3247 | 5 | ConvertType(ElementType)); |
3248 | 5 | Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second, |
3249 | 5 | ConvertType(ElementType)); |
3250 | 5 | return RValue::getComplex(std::make_pair(Real, Imag)); |
3251 | 5 | } |
3252 | 2 | ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); |
3253 | 2 | Value *Real = ComplexVal.first; |
3254 | 2 | Value *Imag = ComplexVal.second; |
3255 | 2 | return RValue::getComplex(std::make_pair(Real, Imag)); |
3256 | 7 | } |
3257 | 35 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
3258 | 35 | if (isArithmeticFenceEnabled) |
3259 | 25 | return RValue::get( |
3260 | 25 | Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType))); |
3261 | 10 | return RValue::get(ArgValue); |
3262 | 35 | } |
3263 | 12 | case Builtin::BI__builtin_bswap16: |
3264 | 29 | case Builtin::BI__builtin_bswap32: |
3265 | 45 | case Builtin::BI__builtin_bswap64: |
3266 | 46 | case Builtin::BI_byteswap_ushort: |
3267 | 47 | case Builtin::BI_byteswap_ulong: |
3268 | 48 | case Builtin::BI_byteswap_uint64: { |
3269 | 48 | return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); |
3270 | 47 | } |
3271 | 3 | case Builtin::BI__builtin_bitreverse8: |
3272 | 6 | case Builtin::BI__builtin_bitreverse16: |
3273 | 9 | case Builtin::BI__builtin_bitreverse32: |
3274 | 12 | case Builtin::BI__builtin_bitreverse64: { |
3275 | 12 | return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); |
3276 | 9 | } |
3277 | 14 | case Builtin::BI__builtin_rotateleft8: |
3278 | 28 | case Builtin::BI__builtin_rotateleft16: |
3279 | 44 | case Builtin::BI__builtin_rotateleft32: |
3280 | 54 | case Builtin::BI__builtin_rotateleft64: |
3281 | 60 | case Builtin::BI_rotl8: // Microsoft variants of rotate left |
3282 | 66 | case Builtin::BI_rotl16: |
3283 | 76 | case Builtin::BI_rotl: |
3284 | 86 | case Builtin::BI_lrotl: |
3285 | 92 | case Builtin::BI_rotl64: |
3286 | 92 | return emitRotate(E, false); |
3287 | | |
3288 | 14 | case Builtin::BI__builtin_rotateright8: |
3289 | 28 | case Builtin::BI__builtin_rotateright16: |
3290 | 42 | case Builtin::BI__builtin_rotateright32: |
3291 | 50 | case Builtin::BI__builtin_rotateright64: |
3292 | 56 | case Builtin::BI_rotr8: // Microsoft variants of rotate right |
3293 | 62 | case Builtin::BI_rotr16: |
3294 | 72 | case Builtin::BI_rotr: |
3295 | 82 | case Builtin::BI_lrotr: |
3296 | 88 | case Builtin::BI_rotr64: |
3297 | 88 | return emitRotate(E, true); |
3298 | | |
3299 | 34 | case Builtin::BI__builtin_constant_p: { |
3300 | 34 | llvm::Type *ResultType = ConvertType(E->getType()); |
3301 | | |
3302 | 34 | const Expr *Arg = E->getArg(0); |
3303 | 34 | QualType ArgType = Arg->getType(); |
3304 | | // FIXME: The allowance for Obj-C pointers and block pointers is historical |
3305 | | // and likely a mistake. |
3306 | 34 | if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType()17 && |
3307 | 34 | !ArgType->isObjCObjectPointerType()17 && !ArgType->isBlockPointerType()13 ) |
3308 | | // Per the GCC documentation, only numeric constants are recognized after |
3309 | | // inlining. |
3310 | 13 | return RValue::get(ConstantInt::get(ResultType, 0)); |
3311 | | |
3312 | 21 | if (Arg->HasSideEffects(getContext())) |
3313 | | // The argument is unevaluated, so be conservative if it might have |
3314 | | // side-effects. |
3315 | 0 | return RValue::get(ConstantInt::get(ResultType, 0)); |
3316 | | |
3317 | 21 | Value *ArgValue = EmitScalarExpr(Arg); |
3318 | 21 | if (ArgType->isObjCObjectPointerType()) { |
3319 | | // Convert Objective-C objects to id because we cannot distinguish between |
3320 | | // LLVM types for Obj-C classes as they are opaque. |
3321 | 4 | ArgType = CGM.getContext().getObjCIdType(); |
3322 | 4 | ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); |
3323 | 4 | } |
3324 | 21 | Function *F = |
3325 | 21 | CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); |
3326 | 21 | Value *Result = Builder.CreateCall(F, ArgValue); |
3327 | 21 | if (Result->getType() != ResultType) |
3328 | 21 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); |
3329 | 21 | return RValue::get(Result); |
3330 | 21 | } |
3331 | 156 | case Builtin::BI__builtin_dynamic_object_size: |
3332 | 294 | case Builtin::BI__builtin_object_size: { |
3333 | 294 | unsigned Type = |
3334 | 294 | E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); |
3335 | 294 | auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); |
3336 | | |
3337 | | // We pass this builtin onto the optimizer so that it can figure out the |
3338 | | // object size in more complex cases. |
3339 | 294 | bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; |
3340 | 294 | return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, |
3341 | 294 | /*EmittedE=*/nullptr, IsDynamic)); |
3342 | 156 | } |
3343 | 25 | case Builtin::BI__builtin_prefetch: { |
3344 | 25 | Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); |
3345 | | // FIXME: Technically these constants should of type 'int', yes? |
3346 | 25 | RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1))21 : |
3347 | 25 | llvm::ConstantInt::get(Int32Ty, 0)4 ; |
3348 | 25 | Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2))18 : |
3349 | 25 | llvm::ConstantInt::get(Int32Ty, 3)7 ; |
3350 | 25 | Value *Data = llvm::ConstantInt::get(Int32Ty, 1); |
3351 | 25 | Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); |
3352 | 25 | Builder.CreateCall(F, {Address, RW, Locality, Data}); |
3353 | 25 | return RValue::get(nullptr); |
3354 | 156 | } |
3355 | 2 | case Builtin::BI__builtin_readcyclecounter: { |
3356 | 2 | Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); |
3357 | 2 | return RValue::get(Builder.CreateCall(F)); |
3358 | 156 | } |
3359 | 1 | case Builtin::BI__builtin___clear_cache: { |
3360 | 1 | Value *Begin = EmitScalarExpr(E->getArg(0)); |
3361 | 1 | Value *End = EmitScalarExpr(E->getArg(1)); |
3362 | 1 | Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); |
3363 | 1 | return RValue::get(Builder.CreateCall(F, {Begin, End})); |
3364 | 156 | } |
3365 | 53 | case Builtin::BI__builtin_trap: |
3366 | 53 | EmitTrapCall(Intrinsic::trap); |
3367 | 53 | return RValue::get(nullptr); |
3368 | 0 | case Builtin::BI__debugbreak: |
3369 | 0 | EmitTrapCall(Intrinsic::debugtrap); |
3370 | 0 | return RValue::get(nullptr); |
3371 | 34 | case Builtin::BI__builtin_unreachable: { |
3372 | 34 | EmitUnreachable(E->getExprLoc()); |
3373 | | |
3374 | | // We do need to preserve an insertion point. |
3375 | 34 | EmitBlock(createBasicBlock("unreachable.cont")); |
3376 | | |
3377 | 34 | return RValue::get(nullptr); |
3378 | 156 | } |
3379 | | |
3380 | 10 | case Builtin::BI__builtin_powi: |
3381 | 20 | case Builtin::BI__builtin_powif: |
3382 | 30 | case Builtin::BI__builtin_powil: { |
3383 | 30 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
3384 | 30 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
3385 | | |
3386 | 30 | if (Builder.getIsFPConstrained()) { |
3387 | | // FIXME: llvm.powi has 2 mangling types, |
3388 | | // llvm.experimental.constrained.powi has one. |
3389 | 3 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3390 | 3 | Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi, |
3391 | 3 | Src0->getType()); |
3392 | 3 | return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 })); |
3393 | 3 | } |
3394 | | |
3395 | 27 | Function *F = CGM.getIntrinsic(Intrinsic::powi, |
3396 | 27 | { Src0->getType(), Src1->getType() }); |
3397 | 27 | return RValue::get(Builder.CreateCall(F, { Src0, Src1 })); |
3398 | 30 | } |
3399 | 10 | case Builtin::BI__builtin_frexp: |
3400 | 20 | case Builtin::BI__builtin_frexpf: |
3401 | 31 | case Builtin::BI__builtin_frexpl: |
3402 | 38 | case Builtin::BI__builtin_frexpf128: |
3403 | 39 | case Builtin::BI__builtin_frexpf16: |
3404 | 39 | return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp)); |
3405 | 30 | case Builtin::BI__builtin_isgreater: |
3406 | 56 | case Builtin::BI__builtin_isgreaterequal: |
3407 | 83 | case Builtin::BI__builtin_isless: |
3408 | 109 | case Builtin::BI__builtin_islessequal: |
3409 | 135 | case Builtin::BI__builtin_islessgreater: |
3410 | 186 | case Builtin::BI__builtin_isunordered: { |
3411 | | // Ordered comparisons: we know the arguments to these are matching scalar |
3412 | | // floating point values. |
3413 | 186 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3414 | 186 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
3415 | 186 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
3416 | | |
3417 | 186 | switch (BuiltinID) { |
3418 | 0 | default: llvm_unreachable("Unknown ordered comparison"); |
3419 | 30 | case Builtin::BI__builtin_isgreater: |
3420 | 30 | LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); |
3421 | 30 | break; |
3422 | 26 | case Builtin::BI__builtin_isgreaterequal: |
3423 | 26 | LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); |
3424 | 26 | break; |
3425 | 27 | case Builtin::BI__builtin_isless: |
3426 | 27 | LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); |
3427 | 27 | break; |
3428 | 26 | case Builtin::BI__builtin_islessequal: |
3429 | 26 | LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); |
3430 | 26 | break; |
3431 | 26 | case Builtin::BI__builtin_islessgreater: |
3432 | 26 | LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); |
3433 | 26 | break; |
3434 | 51 | case Builtin::BI__builtin_isunordered: |
3435 | 51 | LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); |
3436 | 51 | break; |
3437 | 186 | } |
3438 | | // ZExt bool to int type. |
3439 | 186 | return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); |
3440 | 186 | } |
3441 | | |
3442 | 36 | case Builtin::BI__builtin_isnan: { |
3443 | 36 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3444 | 36 | Value *V = EmitScalarExpr(E->getArg(0)); |
3445 | 36 | if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) |
3446 | 0 | return RValue::get(Result); |
3447 | 36 | return RValue::get( |
3448 | 36 | Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan), |
3449 | 36 | ConvertType(E->getType()))); |
3450 | 36 | } |
3451 | | |
3452 | 2 | case Builtin::BI__builtin_issignaling: { |
3453 | 2 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3454 | 2 | Value *V = EmitScalarExpr(E->getArg(0)); |
3455 | 2 | return RValue::get( |
3456 | 2 | Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan), |
3457 | 2 | ConvertType(E->getType()))); |
3458 | 36 | } |
3459 | | |
3460 | 26 | case Builtin::BI__builtin_isinf: { |
3461 | 26 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3462 | 26 | Value *V = EmitScalarExpr(E->getArg(0)); |
3463 | 26 | if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) |
3464 | 0 | return RValue::get(Result); |
3465 | 26 | return RValue::get( |
3466 | 26 | Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf), |
3467 | 26 | ConvertType(E->getType()))); |
3468 | 26 | } |
3469 | | |
3470 | 2 | case Builtin::BIfinite: |
3471 | 2 | case Builtin::BI__finite: |
3472 | 2 | case Builtin::BIfinitef: |
3473 | 2 | case Builtin::BI__finitef: |
3474 | 2 | case Builtin::BIfinitel: |
3475 | 2 | case Builtin::BI__finitel: |
3476 | 20 | case Builtin::BI__builtin_isfinite: { |
3477 | 20 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3478 | 20 | Value *V = EmitScalarExpr(E->getArg(0)); |
3479 | 20 | if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) |
3480 | 0 | return RValue::get(Result); |
3481 | 20 | return RValue::get( |
3482 | 20 | Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite), |
3483 | 20 | ConvertType(E->getType()))); |
3484 | 20 | } |
3485 | | |
3486 | 6 | case Builtin::BI__builtin_isnormal: { |
3487 | 6 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3488 | 6 | Value *V = EmitScalarExpr(E->getArg(0)); |
3489 | 6 | return RValue::get( |
3490 | 6 | Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal), |
3491 | 6 | ConvertType(E->getType()))); |
3492 | 20 | } |
3493 | | |
3494 | 2 | case Builtin::BI__builtin_issubnormal: { |
3495 | 2 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3496 | 2 | Value *V = EmitScalarExpr(E->getArg(0)); |
3497 | 2 | return RValue::get( |
3498 | 2 | Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal), |
3499 | 2 | ConvertType(E->getType()))); |
3500 | 20 | } |
3501 | | |
3502 | 2 | case Builtin::BI__builtin_iszero: { |
3503 | 2 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3504 | 2 | Value *V = EmitScalarExpr(E->getArg(0)); |
3505 | 2 | return RValue::get( |
3506 | 2 | Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero), |
3507 | 2 | ConvertType(E->getType()))); |
3508 | 20 | } |
3509 | | |
3510 | 11 | case Builtin::BI__builtin_isfpclass: { |
3511 | 11 | Expr::EvalResult Result; |
3512 | 11 | if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext())) |
3513 | 0 | break; |
3514 | 11 | uint64_t Test = Result.Val.getInt().getLimitedValue(); |
3515 | 11 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3516 | 11 | Value *V = EmitScalarExpr(E->getArg(0)); |
3517 | 11 | return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test), |
3518 | 11 | ConvertType(E->getType()))); |
3519 | 11 | } |
3520 | | |
3521 | 57 | case Builtin::BI__builtin_nondeterministic_value: { |
3522 | 57 | llvm::Type *Ty = ConvertType(E->getArg(0)->getType()); |
3523 | | |
3524 | 57 | Value *Result = PoisonValue::get(Ty); |
3525 | 57 | Result = Builder.CreateFreeze(Result); |
3526 | | |
3527 | 57 | return RValue::get(Result); |
3528 | 11 | } |
3529 | | |
3530 | 97 | case Builtin::BI__builtin_elementwise_abs: { |
3531 | 97 | Value *Result; |
3532 | 97 | QualType QT = E->getArg(0)->getType(); |
3533 | | |
3534 | 97 | if (auto *VecTy = QT->getAs<VectorType>()) |
3535 | 78 | QT = VecTy->getElementType(); |
3536 | 97 | if (QT->isIntegerType()) |
3537 | 69 | Result = Builder.CreateBinaryIntrinsic( |
3538 | 69 | llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), |
3539 | 69 | Builder.getFalse(), nullptr, "elt.abs"); |
3540 | 28 | else |
3541 | 28 | Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs"); |
3542 | | |
3543 | 97 | return RValue::get(Result); |
3544 | 11 | } |
3545 | | |
3546 | 28 | case Builtin::BI__builtin_elementwise_ceil: |
3547 | 28 | return RValue::get( |
3548 | 28 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); |
3549 | 4 | case Builtin::BI__builtin_elementwise_exp: |
3550 | 4 | return RValue::get( |
3551 | 4 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp")); |
3552 | 4 | case Builtin::BI__builtin_elementwise_exp2: |
3553 | 4 | return RValue::get( |
3554 | 4 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2")); |
3555 | 20 | case Builtin::BI__builtin_elementwise_log: |
3556 | 20 | return RValue::get( |
3557 | 20 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log")); |
3558 | 21 | case Builtin::BI__builtin_elementwise_log2: |
3559 | 21 | return RValue::get( |
3560 | 21 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2")); |
3561 | 19 | case Builtin::BI__builtin_elementwise_log10: |
3562 | 19 | return RValue::get( |
3563 | 19 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10")); |
3564 | 28 | case Builtin::BI__builtin_elementwise_pow: { |
3565 | 28 | return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow)); |
3566 | 11 | } |
3567 | 55 | case Builtin::BI__builtin_elementwise_bitreverse: |
3568 | 55 | return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse, |
3569 | 55 | "elt.bitreverse")); |
3570 | 20 | case Builtin::BI__builtin_elementwise_cos: |
3571 | 20 | return RValue::get( |
3572 | 20 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos")); |
3573 | 28 | case Builtin::BI__builtin_elementwise_floor: |
3574 | 28 | return RValue::get( |
3575 | 28 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor")); |
3576 | 4 | case Builtin::BI__builtin_elementwise_roundeven: |
3577 | 4 | return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven, |
3578 | 4 | "elt.roundeven")); |
3579 | 4 | case Builtin::BI__builtin_elementwise_round: |
3580 | 4 | return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round, |
3581 | 4 | "elt.round")); |
3582 | 4 | case Builtin::BI__builtin_elementwise_rint: |
3583 | 4 | return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint, |
3584 | 4 | "elt.rint")); |
3585 | 4 | case Builtin::BI__builtin_elementwise_nearbyint: |
3586 | 4 | return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint, |
3587 | 4 | "elt.nearbyint")); |
3588 | 20 | case Builtin::BI__builtin_elementwise_sin: |
3589 | 20 | return RValue::get( |
3590 | 20 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin")); |
3591 | | |
3592 | 20 | case Builtin::BI__builtin_elementwise_trunc: |
3593 | 20 | return RValue::get( |
3594 | 20 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc")); |
3595 | 4 | case Builtin::BI__builtin_elementwise_canonicalize: |
3596 | 4 | return RValue::get( |
3597 | 4 | emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); |
3598 | 10 | case Builtin::BI__builtin_elementwise_copysign: |
3599 | 10 | return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign)); |
3600 | 15 | case Builtin::BI__builtin_elementwise_fma: |
3601 | 15 | return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma)); |
3602 | 69 | case Builtin::BI__builtin_elementwise_add_sat: |
3603 | 138 | case Builtin::BI__builtin_elementwise_sub_sat: { |
3604 | 138 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
3605 | 138 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
3606 | 138 | Value *Result; |
3607 | 138 | assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected"); |
3608 | 138 | QualType Ty = E->getArg(0)->getType(); |
3609 | 138 | if (auto *VecTy = Ty->getAs<VectorType>()) |
3610 | 124 | Ty = VecTy->getElementType(); |
3611 | 138 | bool IsSigned = Ty->isSignedIntegerType(); |
3612 | 138 | unsigned Opc; |
3613 | 138 | if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat) |
3614 | 69 | Opc = IsSigned ? llvm::Intrinsic::sadd_sat36 : llvm::Intrinsic::uadd_sat33 ; |
3615 | 69 | else |
3616 | 69 | Opc = IsSigned ? llvm::Intrinsic::ssub_sat36 : llvm::Intrinsic::usub_sat33 ; |
3617 | 138 | Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat"); |
3618 | 138 | return RValue::get(Result); |
3619 | 138 | } |
3620 | | |
3621 | 178 | case Builtin::BI__builtin_elementwise_max: { |
3622 | 178 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
3623 | 178 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
3624 | 178 | Value *Result; |
3625 | 178 | if (Op0->getType()->isIntOrIntVectorTy()) { |
3626 | 147 | QualType Ty = E->getArg(0)->getType(); |
3627 | 147 | if (auto *VecTy = Ty->getAs<VectorType>()) |
3628 | 128 | Ty = VecTy->getElementType(); |
3629 | 147 | Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() |
3630 | 147 | ? llvm::Intrinsic::smax75 |
3631 | 147 | : llvm::Intrinsic::umax72 , |
3632 | 147 | Op0, Op1, nullptr, "elt.max"); |
3633 | 147 | } else |
3634 | 31 | Result = Builder.CreateMaxNum(Op0, Op1, "elt.max"); |
3635 | 178 | return RValue::get(Result); |
3636 | 138 | } |
3637 | 178 | case Builtin::BI__builtin_elementwise_min: { |
3638 | 178 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
3639 | 178 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
3640 | 178 | Value *Result; |
3641 | 178 | if (Op0->getType()->isIntOrIntVectorTy()) { |
3642 | 147 | QualType Ty = E->getArg(0)->getType(); |
3643 | 147 | if (auto *VecTy = Ty->getAs<VectorType>()) |
3644 | 128 | Ty = VecTy->getElementType(); |
3645 | 147 | Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() |
3646 | 147 | ? llvm::Intrinsic::smin75 |
3647 | 147 | : llvm::Intrinsic::umin72 , |
3648 | 147 | Op0, Op1, nullptr, "elt.min"); |
3649 | 147 | } else |
3650 | 31 | Result = Builder.CreateMinNum(Op0, Op1, "elt.min"); |
3651 | 178 | return RValue::get(Result); |
3652 | 138 | } |
3653 | | |
3654 | 45 | case Builtin::BI__builtin_reduce_max: { |
3655 | 45 | auto GetIntrinsicID = [](QualType QT) { |
3656 | 45 | if (auto *VecTy = QT->getAs<VectorType>()) |
3657 | 45 | QT = VecTy->getElementType(); |
3658 | 45 | if (QT->isSignedIntegerType()) |
3659 | 22 | return llvm::Intrinsic::vector_reduce_smax; |
3660 | 23 | if (QT->isUnsignedIntegerType()) |
3661 | 21 | return llvm::Intrinsic::vector_reduce_umax; |
3662 | 2 | assert(QT->isFloatingType() && "must have a float here"); |
3663 | 2 | return llvm::Intrinsic::vector_reduce_fmax; |
3664 | 2 | }; |
3665 | 45 | return RValue::get(emitUnaryBuiltin( |
3666 | 45 | *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); |
3667 | 138 | } |
3668 | | |
3669 | 45 | case Builtin::BI__builtin_reduce_min: { |
3670 | 45 | auto GetIntrinsicID = [](QualType QT) { |
3671 | 45 | if (auto *VecTy = QT->getAs<VectorType>()) |
3672 | 45 | QT = VecTy->getElementType(); |
3673 | 45 | if (QT->isSignedIntegerType()) |
3674 | 22 | return llvm::Intrinsic::vector_reduce_smin; |
3675 | 23 | if (QT->isUnsignedIntegerType()) |
3676 | 21 | return llvm::Intrinsic::vector_reduce_umin; |
3677 | 2 | assert(QT->isFloatingType() && "must have a float here"); |
3678 | 2 | return llvm::Intrinsic::vector_reduce_fmin; |
3679 | 2 | }; |
3680 | | |
3681 | 45 | return RValue::get(emitUnaryBuiltin( |
3682 | 45 | *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); |
3683 | 138 | } |
3684 | | |
3685 | 24 | case Builtin::BI__builtin_reduce_add: |
3686 | 24 | return RValue::get(emitUnaryBuiltin( |
3687 | 24 | *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); |
3688 | 24 | case Builtin::BI__builtin_reduce_mul: |
3689 | 24 | return RValue::get(emitUnaryBuiltin( |
3690 | 24 | *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul")); |
3691 | 2 | case Builtin::BI__builtin_reduce_xor: |
3692 | 2 | return RValue::get(emitUnaryBuiltin( |
3693 | 2 | *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); |
3694 | 22 | case Builtin::BI__builtin_reduce_or: |
3695 | 22 | return RValue::get(emitUnaryBuiltin( |
3696 | 22 | *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or")); |
3697 | 22 | case Builtin::BI__builtin_reduce_and: |
3698 | 22 | return RValue::get(emitUnaryBuiltin( |
3699 | 22 | *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and")); |
3700 | | |
3701 | 31 | case Builtin::BI__builtin_matrix_transpose: { |
3702 | 31 | auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>(); |
3703 | 31 | Value *MatValue = EmitScalarExpr(E->getArg(0)); |
3704 | 31 | MatrixBuilder MB(Builder); |
3705 | 31 | Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(), |
3706 | 31 | MatrixTy->getNumColumns()); |
3707 | 31 | return RValue::get(Result); |
3708 | 138 | } |
3709 | | |
3710 | 36 | case Builtin::BI__builtin_matrix_column_major_load: { |
3711 | 36 | MatrixBuilder MB(Builder); |
3712 | | // Emit everything that isn't dependent on the first parameter type |
3713 | 36 | Value *Stride = EmitScalarExpr(E->getArg(3)); |
3714 | 36 | const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>(); |
3715 | 36 | auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>(); |
3716 | 36 | assert(PtrTy && "arg0 must be of pointer type"); |
3717 | 36 | bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); |
3718 | | |
3719 | 36 | Address Src = EmitPointerWithAlignment(E->getArg(0)); |
3720 | 36 | EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), |
3721 | 36 | E->getArg(0)->getExprLoc(), FD, 0); |
3722 | 36 | Value *Result = MB.CreateColumnMajorLoad( |
3723 | 36 | Src.getElementType(), Src.getPointer(), |
3724 | 36 | Align(Src.getAlignment().getQuantity()), Stride, IsVolatile, |
3725 | 36 | ResultTy->getNumRows(), ResultTy->getNumColumns(), |
3726 | 36 | "matrix"); |
3727 | 36 | return RValue::get(Result); |
3728 | 36 | } |
3729 | | |
3730 | 26 | case Builtin::BI__builtin_matrix_column_major_store: { |
3731 | 26 | MatrixBuilder MB(Builder); |
3732 | 26 | Value *Matrix = EmitScalarExpr(E->getArg(0)); |
3733 | 26 | Address Dst = EmitPointerWithAlignment(E->getArg(1)); |
3734 | 26 | Value *Stride = EmitScalarExpr(E->getArg(2)); |
3735 | | |
3736 | 26 | const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>(); |
3737 | 26 | auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>(); |
3738 | 26 | assert(PtrTy && "arg1 must be of pointer type"); |
3739 | 26 | bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); |
3740 | | |
3741 | 26 | EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(), |
3742 | 26 | E->getArg(1)->getExprLoc(), FD, 0); |
3743 | 26 | Value *Result = MB.CreateColumnMajorStore( |
3744 | 26 | Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()), |
3745 | 26 | Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns()); |
3746 | 26 | return RValue::get(Result); |
3747 | 26 | } |
3748 | | |
3749 | 9 | case Builtin::BI__builtin_isinf_sign: { |
3750 | | // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 |
3751 | 9 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3752 | | // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. |
3753 | 9 | Value *Arg = EmitScalarExpr(E->getArg(0)); |
3754 | 9 | Value *AbsArg = EmitFAbs(*this, Arg); |
3755 | 9 | Value *IsInf = Builder.CreateFCmpOEQ( |
3756 | 9 | AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); |
3757 | 9 | Value *IsNeg = EmitSignBit(*this, Arg); |
3758 | | |
3759 | 9 | llvm::Type *IntTy = ConvertType(E->getType()); |
3760 | 9 | Value *Zero = Constant::getNullValue(IntTy); |
3761 | 9 | Value *One = ConstantInt::get(IntTy, 1); |
3762 | 9 | Value *NegativeOne = ConstantInt::get(IntTy, -1); |
3763 | 9 | Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); |
3764 | 9 | Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); |
3765 | 9 | return RValue::get(Result); |
3766 | 26 | } |
3767 | | |
3768 | 2 | case Builtin::BI__builtin_flt_rounds: { |
3769 | 2 | Function *F = CGM.getIntrinsic(Intrinsic::get_rounding); |
3770 | | |
3771 | 2 | llvm::Type *ResultType = ConvertType(E->getType()); |
3772 | 2 | Value *Result = Builder.CreateCall(F); |
3773 | 2 | if (Result->getType() != ResultType) |
3774 | 0 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
3775 | 0 | "cast"); |
3776 | 2 | return RValue::get(Result); |
3777 | 26 | } |
3778 | | |
3779 | 4 | case Builtin::BI__builtin_set_flt_rounds: { |
3780 | 4 | Function *F = CGM.getIntrinsic(Intrinsic::set_rounding); |
3781 | | |
3782 | 4 | Value *V = EmitScalarExpr(E->getArg(0)); |
3783 | 4 | Builder.CreateCall(F, V); |
3784 | 4 | return RValue::get(nullptr); |
3785 | 26 | } |
3786 | | |
3787 | 1 | case Builtin::BI__builtin_fpclassify: { |
3788 | 1 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
3789 | | // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. |
3790 | 1 | Value *V = EmitScalarExpr(E->getArg(5)); |
3791 | 1 | llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); |
3792 | | |
3793 | | // Create Result |
3794 | 1 | BasicBlock *Begin = Builder.GetInsertBlock(); |
3795 | 1 | BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); |
3796 | 1 | Builder.SetInsertPoint(End); |
3797 | 1 | PHINode *Result = |
3798 | 1 | Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, |
3799 | 1 | "fpclassify_result"); |
3800 | | |
3801 | | // if (V==0) return FP_ZERO |
3802 | 1 | Builder.SetInsertPoint(Begin); |
3803 | 1 | Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), |
3804 | 1 | "iszero"); |
3805 | 1 | Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); |
3806 | 1 | BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); |
3807 | 1 | Builder.CreateCondBr(IsZero, End, NotZero); |
3808 | 1 | Result->addIncoming(ZeroLiteral, Begin); |
3809 | | |
3810 | | // if (V != V) return FP_NAN |
3811 | 1 | Builder.SetInsertPoint(NotZero); |
3812 | 1 | Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); |
3813 | 1 | Value *NanLiteral = EmitScalarExpr(E->getArg(0)); |
3814 | 1 | BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); |
3815 | 1 | Builder.CreateCondBr(IsNan, End, NotNan); |
3816 | 1 | Result->addIncoming(NanLiteral, NotZero); |
3817 | | |
3818 | | // if (fabs(V) == infinity) return FP_INFINITY |
3819 | 1 | Builder.SetInsertPoint(NotNan); |
3820 | 1 | Value *VAbs = EmitFAbs(*this, V); |
3821 | 1 | Value *IsInf = |
3822 | 1 | Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), |
3823 | 1 | "isinf"); |
3824 | 1 | Value *InfLiteral = EmitScalarExpr(E->getArg(1)); |
3825 | 1 | BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); |
3826 | 1 | Builder.CreateCondBr(IsInf, End, NotInf); |
3827 | 1 | Result->addIncoming(InfLiteral, NotNan); |
3828 | | |
3829 | | // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL |
3830 | 1 | Builder.SetInsertPoint(NotInf); |
3831 | 1 | APFloat Smallest = APFloat::getSmallestNormalized( |
3832 | 1 | getContext().getFloatTypeSemantics(E->getArg(5)->getType())); |
3833 | 1 | Value *IsNormal = |
3834 | 1 | Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), |
3835 | 1 | "isnormal"); |
3836 | 1 | Value *NormalResult = |
3837 | 1 | Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), |
3838 | 1 | EmitScalarExpr(E->getArg(3))); |
3839 | 1 | Builder.CreateBr(End); |
3840 | 1 | Result->addIncoming(NormalResult, NotInf); |
3841 | | |
3842 | | // return Result |
3843 | 1 | Builder.SetInsertPoint(End); |
3844 | 1 | return RValue::get(Result); |
3845 | 26 | } |
3846 | | |
3847 | | // An alloca will always return a pointer to the alloca (stack) address |
3848 | | // space. This address space need not be the same as the AST / Language |
3849 | | // default (e.g. in C / C++ auto vars are in the generic address space). At |
3850 | | // the AST level this is handled within CreateTempAlloca et al., but for the |
3851 | | // builtin / dynamic alloca we have to handle it here. We use an explicit cast |
3852 | | // instead of passing an AS to CreateAlloca so as to not inhibit optimisation. |
3853 | 5 | case Builtin::BIalloca: |
3854 | 6 | case Builtin::BI_alloca: |
3855 | 12 | case Builtin::BI__builtin_alloca_uninitialized: |
3856 | 32 | case Builtin::BI__builtin_alloca: { |
3857 | 32 | Value *Size = EmitScalarExpr(E->getArg(0)); |
3858 | 32 | const TargetInfo &TI = getContext().getTargetInfo(); |
3859 | | // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. |
3860 | 32 | const Align SuitableAlignmentInBytes = |
3861 | 32 | CGM.getContext() |
3862 | 32 | .toCharUnitsFromBits(TI.getSuitableAlign()) |
3863 | 32 | .getAsAlign(); |
3864 | 32 | AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); |
3865 | 32 | AI->setAlignment(SuitableAlignmentInBytes); |
3866 | 32 | if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized) |
3867 | 26 | initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); |
3868 | 32 | LangAS AAS = getASTAllocaAddressSpace(); |
3869 | 32 | LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); |
3870 | 32 | if (AAS != EAS) { |
3871 | 6 | llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); |
3872 | 6 | return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, |
3873 | 6 | EAS, Ty)); |
3874 | 6 | } |
3875 | 26 | return RValue::get(AI); |
3876 | 32 | } |
3877 | | |
3878 | 6 | case Builtin::BI__builtin_alloca_with_align_uninitialized: |
3879 | 13 | case Builtin::BI__builtin_alloca_with_align: { |
3880 | 13 | Value *Size = EmitScalarExpr(E->getArg(0)); |
3881 | 13 | Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); |
3882 | 13 | auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); |
3883 | 13 | unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); |
3884 | 13 | const Align AlignmentInBytes = |
3885 | 13 | CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign(); |
3886 | 13 | AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); |
3887 | 13 | AI->setAlignment(AlignmentInBytes); |
3888 | 13 | if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized) |
3889 | 7 | initializeAlloca(*this, AI, Size, AlignmentInBytes); |
3890 | 13 | LangAS AAS = getASTAllocaAddressSpace(); |
3891 | 13 | LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); |
3892 | 13 | if (AAS != EAS) { |
3893 | 6 | llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); |
3894 | 6 | return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, |
3895 | 6 | EAS, Ty)); |
3896 | 6 | } |
3897 | 7 | return RValue::get(AI); |
3898 | 13 | } |
3899 | | |
3900 | 1 | case Builtin::BIbzero: |
3901 | 6 | case Builtin::BI__builtin_bzero: { |
3902 | 6 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
3903 | 6 | Value *SizeVal = EmitScalarExpr(E->getArg(1)); |
3904 | 6 | EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), |
3905 | 6 | E->getArg(0)->getExprLoc(), FD, 0); |
3906 | 6 | Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); |
3907 | 6 | return RValue::get(nullptr); |
3908 | 1 | } |
3909 | | |
3910 | 0 | case Builtin::BIbcopy: |
3911 | 0 | case Builtin::BI__builtin_bcopy: { |
3912 | 0 | Address Src = EmitPointerWithAlignment(E->getArg(0)); |
3913 | 0 | Address Dest = EmitPointerWithAlignment(E->getArg(1)); |
3914 | 0 | Value *SizeVal = EmitScalarExpr(E->getArg(2)); |
3915 | 0 | EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), |
3916 | 0 | E->getArg(0)->getExprLoc(), FD, 0); |
3917 | 0 | EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(1)->getType(), |
3918 | 0 | E->getArg(1)->getExprLoc(), FD, 0); |
3919 | 0 | Builder.CreateMemMove(Dest, Src, SizeVal, false); |
3920 | 0 | return RValue::get(Dest.getPointer()); |
3921 | 0 | } |
3922 | | |
3923 | 53 | case Builtin::BImemcpy: |
3924 | 136 | case Builtin::BI__builtin_memcpy: |
3925 | 137 | case Builtin::BImempcpy: |
3926 | 137 | case Builtin::BI__builtin_mempcpy: { |
3927 | 137 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
3928 | 137 | Address Src = EmitPointerWithAlignment(E->getArg(1)); |
3929 | 137 | Value *SizeVal = EmitScalarExpr(E->getArg(2)); |
3930 | 137 | EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); |
3931 | 137 | EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); |
3932 | 137 | Builder.CreateMemCpy(Dest, Src, SizeVal, false); |
3933 | 137 | if (BuiltinID == Builtin::BImempcpy || |
3934 | 137 | BuiltinID == Builtin::BI__builtin_mempcpy136 ) |
3935 | 1 | return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(), |
3936 | 1 | Dest.getPointer(), SizeVal)); |
3937 | 136 | else |
3938 | 136 | return RValue::get(Dest.getPointer()); |
3939 | 137 | } |
3940 | | |
3941 | 8 | case Builtin::BI__builtin_memcpy_inline: { |
3942 | 8 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
3943 | 8 | Address Src = EmitPointerWithAlignment(E->getArg(1)); |
3944 | 8 | uint64_t Size = |
3945 | 8 | E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); |
3946 | 8 | EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); |
3947 | 8 | EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); |
3948 | 8 | Builder.CreateMemCpyInline(Dest, Src, Size); |
3949 | 8 | return RValue::get(nullptr); |
3950 | 137 | } |
3951 | | |
3952 | 1 | case Builtin::BI__builtin_char_memchr: |
3953 | 1 | BuiltinID = Builtin::BI__builtin_memchr; |
3954 | 1 | break; |
3955 | | |
3956 | 5 | case Builtin::BI__builtin___memcpy_chk: { |
3957 | | // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. |
3958 | 5 | Expr::EvalResult SizeResult, DstSizeResult; |
3959 | 5 | if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || |
3960 | 5 | !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())4 ) |
3961 | 3 | break; |
3962 | 2 | llvm::APSInt Size = SizeResult.Val.getInt(); |
3963 | 2 | llvm::APSInt DstSize = DstSizeResult.Val.getInt(); |
3964 | 2 | if (Size.ugt(DstSize)) |
3965 | 0 | break; |
3966 | 2 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
3967 | 2 | Address Src = EmitPointerWithAlignment(E->getArg(1)); |
3968 | 2 | Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); |
3969 | 2 | Builder.CreateMemCpy(Dest, Src, SizeVal, false); |
3970 | 2 | return RValue::get(Dest.getPointer()); |
3971 | 2 | } |
3972 | | |
3973 | 1 | case Builtin::BI__builtin_objc_memmove_collectable: { |
3974 | 1 | Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); |
3975 | 1 | Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); |
3976 | 1 | Value *SizeVal = EmitScalarExpr(E->getArg(2)); |
3977 | 1 | CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, |
3978 | 1 | DestAddr, SrcAddr, SizeVal); |
3979 | 1 | return RValue::get(DestAddr.getPointer()); |
3980 | 2 | } |
3981 | | |
3982 | 3 | case Builtin::BI__builtin___memmove_chk: { |
3983 | | // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. |
3984 | 3 | Expr::EvalResult SizeResult, DstSizeResult; |
3985 | 3 | if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || |
3986 | 3 | !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) |
3987 | 2 | break; |
3988 | 1 | llvm::APSInt Size = SizeResult.Val.getInt(); |
3989 | 1 | llvm::APSInt DstSize = DstSizeResult.Val.getInt(); |
3990 | 1 | if (Size.ugt(DstSize)) |
3991 | 0 | break; |
3992 | 1 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
3993 | 1 | Address Src = EmitPointerWithAlignment(E->getArg(1)); |
3994 | 1 | Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); |
3995 | 1 | Builder.CreateMemMove(Dest, Src, SizeVal, false); |
3996 | 1 | return RValue::get(Dest.getPointer()); |
3997 | 1 | } |
3998 | | |
3999 | 15 | case Builtin::BImemmove: |
4000 | 70 | case Builtin::BI__builtin_memmove: { |
4001 | 70 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
4002 | 70 | Address Src = EmitPointerWithAlignment(E->getArg(1)); |
4003 | 70 | Value *SizeVal = EmitScalarExpr(E->getArg(2)); |
4004 | 70 | EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); |
4005 | 70 | EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); |
4006 | 70 | Builder.CreateMemMove(Dest, Src, SizeVal, false); |
4007 | 70 | return RValue::get(Dest.getPointer()); |
4008 | 15 | } |
4009 | 26 | case Builtin::BImemset: |
4010 | 34 | case Builtin::BI__builtin_memset: { |
4011 | 34 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
4012 | 34 | Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), |
4013 | 34 | Builder.getInt8Ty()); |
4014 | 34 | Value *SizeVal = EmitScalarExpr(E->getArg(2)); |
4015 | 34 | EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), |
4016 | 34 | E->getArg(0)->getExprLoc(), FD, 0); |
4017 | 34 | Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); |
4018 | 34 | return RValue::get(Dest.getPointer()); |
4019 | 26 | } |
4020 | 3 | case Builtin::BI__builtin_memset_inline: { |
4021 | 3 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
4022 | 3 | Value *ByteVal = |
4023 | 3 | Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); |
4024 | 3 | uint64_t Size = |
4025 | 3 | E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); |
4026 | 3 | EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), |
4027 | 3 | E->getArg(0)->getExprLoc(), FD, 0); |
4028 | 3 | Builder.CreateMemSetInline(Dest, ByteVal, Size); |
4029 | 3 | return RValue::get(nullptr); |
4030 | 26 | } |
4031 | 3 | case Builtin::BI__builtin___memset_chk: { |
4032 | | // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. |
4033 | 3 | Expr::EvalResult SizeResult, DstSizeResult; |
4034 | 3 | if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || |
4035 | 3 | !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) |
4036 | 2 | break; |
4037 | 1 | llvm::APSInt Size = SizeResult.Val.getInt(); |
4038 | 1 | llvm::APSInt DstSize = DstSizeResult.Val.getInt(); |
4039 | 1 | if (Size.ugt(DstSize)) |
4040 | 0 | break; |
4041 | 1 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
4042 | 1 | Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), |
4043 | 1 | Builder.getInt8Ty()); |
4044 | 1 | Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); |
4045 | 1 | Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); |
4046 | 1 | return RValue::get(Dest.getPointer()); |
4047 | 1 | } |
4048 | 2 | case Builtin::BI__builtin_wmemchr: { |
4049 | | // The MSVC runtime library does not provide a definition of wmemchr, so we |
4050 | | // need an inline implementation. |
4051 | 2 | if (!getTarget().getTriple().isOSMSVCRT()) |
4052 | 1 | break; |
4053 | | |
4054 | 1 | llvm::Type *WCharTy = ConvertType(getContext().WCharTy); |
4055 | 1 | Value *Str = EmitScalarExpr(E->getArg(0)); |
4056 | 1 | Value *Chr = EmitScalarExpr(E->getArg(1)); |
4057 | 1 | Value *Size = EmitScalarExpr(E->getArg(2)); |
4058 | | |
4059 | 1 | BasicBlock *Entry = Builder.GetInsertBlock(); |
4060 | 1 | BasicBlock *CmpEq = createBasicBlock("wmemchr.eq"); |
4061 | 1 | BasicBlock *Next = createBasicBlock("wmemchr.next"); |
4062 | 1 | BasicBlock *Exit = createBasicBlock("wmemchr.exit"); |
4063 | 1 | Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); |
4064 | 1 | Builder.CreateCondBr(SizeEq0, Exit, CmpEq); |
4065 | | |
4066 | 1 | EmitBlock(CmpEq); |
4067 | 1 | PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2); |
4068 | 1 | StrPhi->addIncoming(Str, Entry); |
4069 | 1 | PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); |
4070 | 1 | SizePhi->addIncoming(Size, Entry); |
4071 | 1 | CharUnits WCharAlign = |
4072 | 1 | getContext().getTypeAlignInChars(getContext().WCharTy); |
4073 | 1 | Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign); |
4074 | 1 | Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0); |
4075 | 1 | Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr); |
4076 | 1 | Builder.CreateCondBr(StrEqChr, Exit, Next); |
4077 | | |
4078 | 1 | EmitBlock(Next); |
4079 | 1 | Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1); |
4080 | 1 | Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); |
4081 | 1 | Value *NextSizeEq0 = |
4082 | 1 | Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); |
4083 | 1 | Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq); |
4084 | 1 | StrPhi->addIncoming(NextStr, Next); |
4085 | 1 | SizePhi->addIncoming(NextSize, Next); |
4086 | | |
4087 | 1 | EmitBlock(Exit); |
4088 | 1 | PHINode *Ret = Builder.CreatePHI(Str->getType(), 3); |
4089 | 1 | Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry); |
4090 | 1 | Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next); |
4091 | 1 | Ret->addIncoming(FoundChr, CmpEq); |
4092 | 1 | return RValue::get(Ret); |
4093 | 2 | } |
4094 | 2 | case Builtin::BI__builtin_wmemcmp: { |
4095 | | // The MSVC runtime library does not provide a definition of wmemcmp, so we |
4096 | | // need an inline implementation. |
4097 | 2 | if (!getTarget().getTriple().isOSMSVCRT()) |
4098 | 1 | break; |
4099 | | |
4100 | 1 | llvm::Type *WCharTy = ConvertType(getContext().WCharTy); |
4101 | | |
4102 | 1 | Value *Dst = EmitScalarExpr(E->getArg(0)); |
4103 | 1 | Value *Src = EmitScalarExpr(E->getArg(1)); |
4104 | 1 | Value *Size = EmitScalarExpr(E->getArg(2)); |
4105 | | |
4106 | 1 | BasicBlock *Entry = Builder.GetInsertBlock(); |
4107 | 1 | BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt"); |
4108 | 1 | BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt"); |
4109 | 1 | BasicBlock *Next = createBasicBlock("wmemcmp.next"); |
4110 | 1 | BasicBlock *Exit = createBasicBlock("wmemcmp.exit"); |
4111 | 1 | Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); |
4112 | 1 | Builder.CreateCondBr(SizeEq0, Exit, CmpGT); |
4113 | | |
4114 | 1 | EmitBlock(CmpGT); |
4115 | 1 | PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2); |
4116 | 1 | DstPhi->addIncoming(Dst, Entry); |
4117 | 1 | PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2); |
4118 | 1 | SrcPhi->addIncoming(Src, Entry); |
4119 | 1 | PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); |
4120 | 1 | SizePhi->addIncoming(Size, Entry); |
4121 | 1 | CharUnits WCharAlign = |
4122 | 1 | getContext().getTypeAlignInChars(getContext().WCharTy); |
4123 | 1 | Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign); |
4124 | 1 | Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign); |
4125 | 1 | Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh); |
4126 | 1 | Builder.CreateCondBr(DstGtSrc, Exit, CmpLT); |
4127 | | |
4128 | 1 | EmitBlock(CmpLT); |
4129 | 1 | Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh); |
4130 | 1 | Builder.CreateCondBr(DstLtSrc, Exit, Next); |
4131 | | |
4132 | 1 | EmitBlock(Next); |
4133 | 1 | Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1); |
4134 | 1 | Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1); |
4135 | 1 | Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); |
4136 | 1 | Value *NextSizeEq0 = |
4137 | 1 | Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); |
4138 | 1 | Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT); |
4139 | 1 | DstPhi->addIncoming(NextDst, Next); |
4140 | 1 | SrcPhi->addIncoming(NextSrc, Next); |
4141 | 1 | SizePhi->addIncoming(NextSize, Next); |
4142 | | |
4143 | 1 | EmitBlock(Exit); |
4144 | 1 | PHINode *Ret = Builder.CreatePHI(IntTy, 4); |
4145 | 1 | Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry); |
4146 | 1 | Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT); |
4147 | 1 | Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT); |
4148 | 1 | Ret->addIncoming(ConstantInt::get(IntTy, 0), Next); |
4149 | 1 | return RValue::get(Ret); |
4150 | 2 | } |
4151 | 0 | case Builtin::BI__builtin_dwarf_cfa: { |
4152 | | // The offset in bytes from the first argument to the CFA. |
4153 | | // |
4154 | | // Why on earth is this in the frontend? Is there any reason at |
4155 | | // all that the backend can't reasonably determine this while |
4156 | | // lowering llvm.eh.dwarf.cfa()? |
4157 | | // |
4158 | | // TODO: If there's a satisfactory reason, add a target hook for |
4159 | | // this instead of hard-coding 0, which is correct for most targets. |
4160 | 0 | int32_t Offset = 0; |
4161 | |
|
4162 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); |
4163 | 0 | return RValue::get(Builder.CreateCall(F, |
4164 | 0 | llvm::ConstantInt::get(Int32Ty, Offset))); |
4165 | 2 | } |
4166 | 3 | case Builtin::BI__builtin_return_address: { |
4167 | 3 | Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), |
4168 | 3 | getContext().UnsignedIntTy); |
4169 | 3 | Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); |
4170 | 3 | return RValue::get(Builder.CreateCall(F, Depth)); |
4171 | 2 | } |
4172 | 4 | case Builtin::BI_ReturnAddress: { |
4173 | 4 | Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); |
4174 | 4 | return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); |
4175 | 2 | } |
4176 | 7 | case Builtin::BI__builtin_frame_address: { |
4177 | 7 | Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), |
4178 | 7 | getContext().UnsignedIntTy); |
4179 | 7 | Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); |
4180 | 7 | return RValue::get(Builder.CreateCall(F, Depth)); |
4181 | 2 | } |
4182 | 2 | case Builtin::BI__builtin_extract_return_addr: { |
4183 | 2 | Value *Address = EmitScalarExpr(E->getArg(0)); |
4184 | 2 | Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); |
4185 | 2 | return RValue::get(Result); |
4186 | 2 | } |
4187 | 0 | case Builtin::BI__builtin_frob_return_addr: { |
4188 | 0 | Value *Address = EmitScalarExpr(E->getArg(0)); |
4189 | 0 | Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); |
4190 | 0 | return RValue::get(Result); |
4191 | 2 | } |
4192 | 5 | case Builtin::BI__builtin_dwarf_sp_column: { |
4193 | 5 | llvm::IntegerType *Ty |
4194 | 5 | = cast<llvm::IntegerType>(ConvertType(E->getType())); |
4195 | 5 | int Column = getTargetHooks().getDwarfEHStackPointer(CGM); |
4196 | 5 | if (Column == -1) { |
4197 | 0 | CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); |
4198 | 0 | return RValue::get(llvm::UndefValue::get(Ty)); |
4199 | 0 | } |
4200 | 5 | return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); |
4201 | 5 | } |
4202 | 5 | case Builtin::BI__builtin_init_dwarf_reg_size_table: { |
4203 | 5 | Value *Address = EmitScalarExpr(E->getArg(0)); |
4204 | 5 | if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) |
4205 | 0 | CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); |
4206 | 5 | return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); |
4207 | 5 | } |
4208 | 0 | case Builtin::BI__builtin_eh_return: { |
4209 | 0 | Value *Int = EmitScalarExpr(E->getArg(0)); |
4210 | 0 | Value *Ptr = EmitScalarExpr(E->getArg(1)); |
4211 | |
|
4212 | 0 | llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); |
4213 | 0 | assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && |
4214 | 0 | "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); |
4215 | 0 | Function *F = |
4216 | 0 | CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 |
4217 | 0 | : Intrinsic::eh_return_i64); |
4218 | 0 | Builder.CreateCall(F, {Int, Ptr}); |
4219 | 0 | Builder.CreateUnreachable(); |
4220 | | |
4221 | | // We do need to preserve an insertion point. |
4222 | 0 | EmitBlock(createBasicBlock("builtin_eh_return.cont")); |
4223 | |
|
4224 | 0 | return RValue::get(nullptr); |
4225 | 0 | } |
4226 | 2 | case Builtin::BI__builtin_unwind_init: { |
4227 | 2 | Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); |
4228 | 2 | Builder.CreateCall(F); |
4229 | 2 | return RValue::get(nullptr); |
4230 | 0 | } |
4231 | 0 | case Builtin::BI__builtin_extend_pointer: { |
4232 | | // Extends a pointer to the size of an _Unwind_Word, which is |
4233 | | // uint64_t on all platforms. Generally this gets poked into a |
4234 | | // register and eventually used as an address, so if the |
4235 | | // addressing registers are wider than pointers and the platform |
4236 | | // doesn't implicitly ignore high-order bits when doing |
4237 | | // addressing, we need to make sure we zext / sext based on |
4238 | | // the platform's expectations. |
4239 | | // |
4240 | | // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html |
4241 | | |
4242 | | // Cast the pointer to intptr_t. |
4243 | 0 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
4244 | 0 | Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); |
4245 | | |
4246 | | // If that's 64 bits, we're done. |
4247 | 0 | if (IntPtrTy->getBitWidth() == 64) |
4248 | 0 | return RValue::get(Result); |
4249 | | |
4250 | | // Otherwise, ask the codegen data what to do. |
4251 | 0 | if (getTargetHooks().extendPointerWithSExt()) |
4252 | 0 | return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); |
4253 | 0 | else |
4254 | 0 | return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); |
4255 | 0 | } |
4256 | 6 | case Builtin::BI__builtin_setjmp: { |
4257 | | // Buffer is a void**. |
4258 | 6 | Address Buf = EmitPointerWithAlignment(E->getArg(0)); |
4259 | | |
4260 | | // Store the frame pointer to the setjmp buffer. |
4261 | 6 | Value *FrameAddr = Builder.CreateCall( |
4262 | 6 | CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), |
4263 | 6 | ConstantInt::get(Int32Ty, 0)); |
4264 | 6 | Builder.CreateStore(FrameAddr, Buf); |
4265 | | |
4266 | | // Store the stack pointer to the setjmp buffer. |
4267 | 6 | Value *StackAddr = Builder.CreateStackSave(); |
4268 | 6 | assert(Buf.getPointer()->getType() == StackAddr->getType()); |
4269 | | |
4270 | 6 | Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); |
4271 | 6 | Builder.CreateStore(StackAddr, StackSaveSlot); |
4272 | | |
4273 | | // Call LLVM's EH setjmp, which is lightweight. |
4274 | 6 | Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); |
4275 | 6 | return RValue::get(Builder.CreateCall(F, Buf.getPointer())); |
4276 | 6 | } |
4277 | 8 | case Builtin::BI__builtin_longjmp: { |
4278 | 8 | Value *Buf = EmitScalarExpr(E->getArg(0)); |
4279 | | |
4280 | | // Call LLVM's EH longjmp, which is lightweight. |
4281 | 8 | Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); |
4282 | | |
4283 | | // longjmp doesn't return; mark this as unreachable. |
4284 | 8 | Builder.CreateUnreachable(); |
4285 | | |
4286 | | // We do need to preserve an insertion point. |
4287 | 8 | EmitBlock(createBasicBlock("longjmp.cont")); |
4288 | | |
4289 | 8 | return RValue::get(nullptr); |
4290 | 6 | } |
4291 | 52 | case Builtin::BI__builtin_launder: { |
4292 | 52 | const Expr *Arg = E->getArg(0); |
4293 | 52 | QualType ArgTy = Arg->getType()->getPointeeType(); |
4294 | 52 | Value *Ptr = EmitScalarExpr(Arg); |
4295 | 52 | if (TypeRequiresBuiltinLaunder(CGM, ArgTy)) |
4296 | 11 | Ptr = Builder.CreateLaunderInvariantGroup(Ptr); |
4297 | | |
4298 | 52 | return RValue::get(Ptr); |
4299 | 6 | } |
4300 | 0 | case Builtin::BI__sync_fetch_and_add: |
4301 | 0 | case Builtin::BI__sync_fetch_and_sub: |
4302 | 0 | case Builtin::BI__sync_fetch_and_or: |
4303 | 0 | case Builtin::BI__sync_fetch_and_and: |
4304 | 0 | case Builtin::BI__sync_fetch_and_xor: |
4305 | 0 | case Builtin::BI__sync_fetch_and_nand: |
4306 | 0 | case Builtin::BI__sync_add_and_fetch: |
4307 | 0 | case Builtin::BI__sync_sub_and_fetch: |
4308 | 0 | case Builtin::BI__sync_and_and_fetch: |
4309 | 0 | case Builtin::BI__sync_or_and_fetch: |
4310 | 0 | case Builtin::BI__sync_xor_and_fetch: |
4311 | 0 | case Builtin::BI__sync_nand_and_fetch: |
4312 | 0 | case Builtin::BI__sync_val_compare_and_swap: |
4313 | 0 | case Builtin::BI__sync_bool_compare_and_swap: |
4314 | 0 | case Builtin::BI__sync_lock_test_and_set: |
4315 | 0 | case Builtin::BI__sync_lock_release: |
4316 | 0 | case Builtin::BI__sync_swap: |
4317 | 0 | llvm_unreachable("Shouldn't make it through sema"); |
4318 | 4 | case Builtin::BI__sync_fetch_and_add_1: |
4319 | 8 | case Builtin::BI__sync_fetch_and_add_2: |
4320 | 14 | case Builtin::BI__sync_fetch_and_add_4: |
4321 | 19 | case Builtin::BI__sync_fetch_and_add_8: |
4322 | 22 | case Builtin::BI__sync_fetch_and_add_16: |
4323 | 22 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); |
4324 | 5 | case Builtin::BI__sync_fetch_and_sub_1: |
4325 | 9 | case Builtin::BI__sync_fetch_and_sub_2: |
4326 | 13 | case Builtin::BI__sync_fetch_and_sub_4: |
4327 | 18 | case Builtin::BI__sync_fetch_and_sub_8: |
4328 | 19 | case Builtin::BI__sync_fetch_and_sub_16: |
4329 | 19 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); |
4330 | 4 | case Builtin::BI__sync_fetch_and_or_1: |
4331 | 8 | case Builtin::BI__sync_fetch_and_or_2: |
4332 | 13 | case Builtin::BI__sync_fetch_and_or_4: |
4333 | 17 | case Builtin::BI__sync_fetch_and_or_8: |
4334 | 18 | case Builtin::BI__sync_fetch_and_or_16: |
4335 | 18 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); |
4336 | 4 | case Builtin::BI__sync_fetch_and_and_1: |
4337 | 8 | case Builtin::BI__sync_fetch_and_and_2: |
4338 | 13 | case Builtin::BI__sync_fetch_and_and_4: |
4339 | 17 | case Builtin::BI__sync_fetch_and_and_8: |
4340 | 18 | case Builtin::BI__sync_fetch_and_and_16: |
4341 | 18 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); |
4342 | 4 | case Builtin::BI__sync_fetch_and_xor_1: |
4343 | 8 | case Builtin::BI__sync_fetch_and_xor_2: |
4344 | 13 | case Builtin::BI__sync_fetch_and_xor_4: |
4345 | 17 | case Builtin::BI__sync_fetch_and_xor_8: |
4346 | 20 | case Builtin::BI__sync_fetch_and_xor_16: |
4347 | 20 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); |
4348 | 4 | case Builtin::BI__sync_fetch_and_nand_1: |
4349 | 8 | case Builtin::BI__sync_fetch_and_nand_2: |
4350 | 13 | case Builtin::BI__sync_fetch_and_nand_4: |
4351 | 17 | case Builtin::BI__sync_fetch_and_nand_8: |
4352 | 18 | case Builtin::BI__sync_fetch_and_nand_16: |
4353 | 18 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); |
4354 | | |
4355 | | // Clang extensions: not overloaded yet. |
4356 | 1 | case Builtin::BI__sync_fetch_and_min: |
4357 | 1 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); |
4358 | 1 | case Builtin::BI__sync_fetch_and_max: |
4359 | 1 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); |
4360 | 1 | case Builtin::BI__sync_fetch_and_umin: |
4361 | 1 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); |
4362 | 1 | case Builtin::BI__sync_fetch_and_umax: |
4363 | 1 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); |
4364 | | |
4365 | 2 | case Builtin::BI__sync_add_and_fetch_1: |
4366 | 4 | case Builtin::BI__sync_add_and_fetch_2: |
4367 | 7 | case Builtin::BI__sync_add_and_fetch_4: |
4368 | 9 | case Builtin::BI__sync_add_and_fetch_8: |
4369 | 10 | case Builtin::BI__sync_add_and_fetch_16: |
4370 | 10 | return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, |
4371 | 10 | llvm::Instruction::Add); |
4372 | 2 | case Builtin::BI__sync_sub_and_fetch_1: |
4373 | 4 | case Builtin::BI__sync_sub_and_fetch_2: |
4374 | 7 | case Builtin::BI__sync_sub_and_fetch_4: |
4375 | 9 | case Builtin::BI__sync_sub_and_fetch_8: |
4376 | 11 | case Builtin::BI__sync_sub_and_fetch_16: |
4377 | 11 | return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, |
4378 | 11 | llvm::Instruction::Sub); |
4379 | 3 | case Builtin::BI__sync_and_and_fetch_1: |
4380 | 5 | case Builtin::BI__sync_and_and_fetch_2: |
4381 | 7 | case Builtin::BI__sync_and_and_fetch_4: |
4382 | 9 | case Builtin::BI__sync_and_and_fetch_8: |
4383 | 10 | case Builtin::BI__sync_and_and_fetch_16: |
4384 | 10 | return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, |
4385 | 10 | llvm::Instruction::And); |
4386 | 3 | case Builtin::BI__sync_or_and_fetch_1: |
4387 | 5 | case Builtin::BI__sync_or_and_fetch_2: |
4388 | 7 | case Builtin::BI__sync_or_and_fetch_4: |
4389 | 9 | case Builtin::BI__sync_or_and_fetch_8: |
4390 | 10 | case Builtin::BI__sync_or_and_fetch_16: |
4391 | 10 | return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, |
4392 | 10 | llvm::Instruction::Or); |
4393 | 3 | case Builtin::BI__sync_xor_and_fetch_1: |
4394 | 5 | case Builtin::BI__sync_xor_and_fetch_2: |
4395 | 8 | case Builtin::BI__sync_xor_and_fetch_4: |
4396 | 10 | case Builtin::BI__sync_xor_and_fetch_8: |
4397 | 11 | case Builtin::BI__sync_xor_and_fetch_16: |
4398 | 11 | return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, |
4399 | 11 | llvm::Instruction::Xor); |
4400 | 3 | case Builtin::BI__sync_nand_and_fetch_1: |
4401 | 5 | case Builtin::BI__sync_nand_and_fetch_2: |
4402 | 7 | case Builtin::BI__sync_nand_and_fetch_4: |
4403 | 9 | case Builtin::BI__sync_nand_and_fetch_8: |
4404 | 12 | case Builtin::BI__sync_nand_and_fetch_16: |
4405 | 12 | return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, |
4406 | 12 | llvm::Instruction::And, true); |
4407 | | |
4408 | 3 | case Builtin::BI__sync_val_compare_and_swap_1: |
4409 | 5 | case Builtin::BI__sync_val_compare_and_swap_2: |
4410 | 10 | case Builtin::BI__sync_val_compare_and_swap_4: |
4411 | 13 | case Builtin::BI__sync_val_compare_and_swap_8: |
4412 | 15 | case Builtin::BI__sync_val_compare_and_swap_16: |
4413 | 15 | return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); |
4414 | | |
4415 | 2 | case Builtin::BI__sync_bool_compare_and_swap_1: |
4416 | 4 | case Builtin::BI__sync_bool_compare_and_swap_2: |
4417 | 9 | case Builtin::BI__sync_bool_compare_and_swap_4: |
4418 | 12 | case Builtin::BI__sync_bool_compare_and_swap_8: |
4419 | 14 | case Builtin::BI__sync_bool_compare_and_swap_16: |
4420 | 14 | return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); |
4421 | | |
4422 | 0 | case Builtin::BI__sync_swap_1: |
4423 | 0 | case Builtin::BI__sync_swap_2: |
4424 | 1 | case Builtin::BI__sync_swap_4: |
4425 | 1 | case Builtin::BI__sync_swap_8: |
4426 | 2 | case Builtin::BI__sync_swap_16: |
4427 | 2 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); |
4428 | | |
4429 | 2 | case Builtin::BI__sync_lock_test_and_set_1: |
4430 | 4 | case Builtin::BI__sync_lock_test_and_set_2: |
4431 | 8 | case Builtin::BI__sync_lock_test_and_set_4: |
4432 | 12 | case Builtin::BI__sync_lock_test_and_set_8: |
4433 | 13 | case Builtin::BI__sync_lock_test_and_set_16: |
4434 | 13 | return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); |
4435 | | |
4436 | 2 | case Builtin::BI__sync_lock_release_1: |
4437 | 4 | case Builtin::BI__sync_lock_release_2: |
4438 | 9 | case Builtin::BI__sync_lock_release_4: |
4439 | 11 | case Builtin::BI__sync_lock_release_8: |
4440 | 13 | case Builtin::BI__sync_lock_release_16: { |
4441 | 13 | Value *Ptr = CheckAtomicAlignment(*this, E); |
4442 | 13 | QualType ElTy = E->getArg(0)->getType()->getPointeeType(); |
4443 | 13 | CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); |
4444 | 13 | llvm::Type *ITy = |
4445 | 13 | llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8); |
4446 | 13 | llvm::StoreInst *Store = |
4447 | 13 | Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, |
4448 | 13 | StoreSize); |
4449 | 13 | Store->setAtomic(llvm::AtomicOrdering::Release); |
4450 | 13 | return RValue::get(nullptr); |
4451 | 11 | } |
4452 | | |
4453 | 2 | case Builtin::BI__sync_synchronize: { |
4454 | | // We assume this is supposed to correspond to a C++0x-style |
4455 | | // sequentially-consistent fence (i.e. this is only usable for |
4456 | | // synchronization, not device I/O or anything like that). This intrinsic |
4457 | | // is really badly designed in the sense that in theory, there isn't |
4458 | | // any way to safely use it... but in practice, it mostly works |
4459 | | // to use it with non-atomic loads and stores to get acquire/release |
4460 | | // semantics. |
4461 | 2 | Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); |
4462 | 2 | return RValue::get(nullptr); |
4463 | 11 | } |
4464 | | |
4465 | 35 | case Builtin::BI__builtin_nontemporal_load: |
4466 | 35 | return RValue::get(EmitNontemporalLoad(*this, E)); |
4467 | 84 | case Builtin::BI__builtin_nontemporal_store: |
4468 | 84 | return RValue::get(EmitNontemporalStore(*this, E)); |
4469 | 11 | case Builtin::BI__c11_atomic_is_lock_free: |
4470 | 27 | case Builtin::BI__atomic_is_lock_free: { |
4471 | | // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the |
4472 | | // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since |
4473 | | // _Atomic(T) is always properly-aligned. |
4474 | 27 | const char *LibCallName = "__atomic_is_lock_free"; |
4475 | 27 | CallArgList Args; |
4476 | 27 | Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), |
4477 | 27 | getContext().getSizeType()); |
4478 | 27 | if (BuiltinID == Builtin::BI__atomic_is_lock_free) |
4479 | 16 | Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), |
4480 | 16 | getContext().VoidPtrTy); |
4481 | 11 | else |
4482 | 11 | Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), |
4483 | 11 | getContext().VoidPtrTy); |
4484 | 27 | const CGFunctionInfo &FuncInfo = |
4485 | 27 | CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); |
4486 | 27 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); |
4487 | 27 | llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); |
4488 | 27 | return EmitCall(FuncInfo, CGCallee::forDirect(Func), |
4489 | 27 | ReturnValueSlot(), Args); |
4490 | 11 | } |
4491 | | |
4492 | 8 | case Builtin::BI__atomic_test_and_set: { |
4493 | | // Look at the argument type to determine whether this is a volatile |
4494 | | // operation. The parameter type is always volatile. |
4495 | 8 | QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); |
4496 | 8 | bool Volatile = |
4497 | 8 | PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); |
4498 | | |
4499 | 8 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
4500 | 8 | Value *NewVal = Builder.getInt8(1); |
4501 | 8 | Value *Order = EmitScalarExpr(E->getArg(1)); |
4502 | 8 | if (isa<llvm::ConstantInt>(Order)) { |
4503 | 8 | int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); |
4504 | 8 | AtomicRMWInst *Result = nullptr; |
4505 | 8 | switch (ord) { |
4506 | 0 | case 0: // memory_order_relaxed |
4507 | 0 | default: // invalid order |
4508 | 0 | Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, |
4509 | 0 | llvm::AtomicOrdering::Monotonic); |
4510 | 0 | break; |
4511 | 0 | case 1: // memory_order_consume |
4512 | 4 | case 2: // memory_order_acquire |
4513 | 4 | Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, |
4514 | 4 | llvm::AtomicOrdering::Acquire); |
4515 | 4 | break; |
4516 | 0 | case 3: // memory_order_release |
4517 | 0 | Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, |
4518 | 0 | llvm::AtomicOrdering::Release); |
4519 | 0 | break; |
4520 | 0 | case 4: // memory_order_acq_rel |
4521 | |
|
4522 | 0 | Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, |
4523 | 0 | llvm::AtomicOrdering::AcquireRelease); |
4524 | 0 | break; |
4525 | 4 | case 5: // memory_order_seq_cst |
4526 | 4 | Result = Builder.CreateAtomicRMW( |
4527 | 4 | llvm::AtomicRMWInst::Xchg, Ptr, NewVal, |
4528 | 4 | llvm::AtomicOrdering::SequentiallyConsistent); |
4529 | 4 | break; |
4530 | 8 | } |
4531 | 8 | Result->setVolatile(Volatile); |
4532 | 8 | return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); |
4533 | 8 | } |
4534 | | |
4535 | 0 | llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); |
4536 | |
|
4537 | 0 | llvm::BasicBlock *BBs[5] = { |
4538 | 0 | createBasicBlock("monotonic", CurFn), |
4539 | 0 | createBasicBlock("acquire", CurFn), |
4540 | 0 | createBasicBlock("release", CurFn), |
4541 | 0 | createBasicBlock("acqrel", CurFn), |
4542 | 0 | createBasicBlock("seqcst", CurFn) |
4543 | 0 | }; |
4544 | 0 | llvm::AtomicOrdering Orders[5] = { |
4545 | 0 | llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, |
4546 | 0 | llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, |
4547 | 0 | llvm::AtomicOrdering::SequentiallyConsistent}; |
4548 | |
|
4549 | 0 | Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); |
4550 | 0 | llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); |
4551 | |
|
4552 | 0 | Builder.SetInsertPoint(ContBB); |
4553 | 0 | PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); |
4554 | |
|
4555 | 0 | for (unsigned i = 0; i < 5; ++i) { |
4556 | 0 | Builder.SetInsertPoint(BBs[i]); |
4557 | 0 | AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, |
4558 | 0 | Ptr, NewVal, Orders[i]); |
4559 | 0 | RMW->setVolatile(Volatile); |
4560 | 0 | Result->addIncoming(RMW, BBs[i]); |
4561 | 0 | Builder.CreateBr(ContBB); |
4562 | 0 | } |
4563 | |
|
4564 | 0 | SI->addCase(Builder.getInt32(0), BBs[0]); |
4565 | 0 | SI->addCase(Builder.getInt32(1), BBs[1]); |
4566 | 0 | SI->addCase(Builder.getInt32(2), BBs[1]); |
4567 | 0 | SI->addCase(Builder.getInt32(3), BBs[2]); |
4568 | 0 | SI->addCase(Builder.getInt32(4), BBs[3]); |
4569 | 0 | SI->addCase(Builder.getInt32(5), BBs[4]); |
4570 | |
|
4571 | 0 | Builder.SetInsertPoint(ContBB); |
4572 | 0 | return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); |
4573 | 8 | } |
4574 | | |
4575 | 8 | case Builtin::BI__atomic_clear: { |
4576 | 8 | QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); |
4577 | 8 | bool Volatile = |
4578 | 8 | PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); |
4579 | | |
4580 | 8 | Address Ptr = EmitPointerWithAlignment(E->getArg(0)); |
4581 | 8 | Ptr = Ptr.withElementType(Int8Ty); |
4582 | 8 | Value *NewVal = Builder.getInt8(0); |
4583 | 8 | Value *Order = EmitScalarExpr(E->getArg(1)); |
4584 | 8 | if (isa<llvm::ConstantInt>(Order)) { |
4585 | 8 | int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); |
4586 | 8 | StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); |
4587 | 8 | switch (ord) { |
4588 | 0 | case 0: // memory_order_relaxed |
4589 | 0 | default: // invalid order |
4590 | 0 | Store->setOrdering(llvm::AtomicOrdering::Monotonic); |
4591 | 0 | break; |
4592 | 4 | case 3: // memory_order_release |
4593 | 4 | Store->setOrdering(llvm::AtomicOrdering::Release); |
4594 | 4 | break; |
4595 | 4 | case 5: // memory_order_seq_cst |
4596 | 4 | Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); |
4597 | 4 | break; |
4598 | 8 | } |
4599 | 8 | return RValue::get(nullptr); |
4600 | 8 | } |
4601 | | |
4602 | 0 | llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); |
4603 | |
|
4604 | 0 | llvm::BasicBlock *BBs[3] = { |
4605 | 0 | createBasicBlock("monotonic", CurFn), |
4606 | 0 | createBasicBlock("release", CurFn), |
4607 | 0 | createBasicBlock("seqcst", CurFn) |
4608 | 0 | }; |
4609 | 0 | llvm::AtomicOrdering Orders[3] = { |
4610 | 0 | llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, |
4611 | 0 | llvm::AtomicOrdering::SequentiallyConsistent}; |
4612 | |
|
4613 | 0 | Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); |
4614 | 0 | llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); |
4615 | |
|
4616 | 0 | for (unsigned i = 0; i < 3; ++i) { |
4617 | 0 | Builder.SetInsertPoint(BBs[i]); |
4618 | 0 | StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); |
4619 | 0 | Store->setOrdering(Orders[i]); |
4620 | 0 | Builder.CreateBr(ContBB); |
4621 | 0 | } |
4622 | |
|
4623 | 0 | SI->addCase(Builder.getInt32(0), BBs[0]); |
4624 | 0 | SI->addCase(Builder.getInt32(3), BBs[1]); |
4625 | 0 | SI->addCase(Builder.getInt32(5), BBs[2]); |
4626 | |
|
4627 | 0 | Builder.SetInsertPoint(ContBB); |
4628 | 0 | return RValue::get(nullptr); |
4629 | 8 | } |
4630 | | |
4631 | 0 | case Builtin::BI__atomic_thread_fence: |
4632 | 0 | case Builtin::BI__atomic_signal_fence: |
4633 | 0 | case Builtin::BI__c11_atomic_thread_fence: |
4634 | 0 | case Builtin::BI__c11_atomic_signal_fence: { |
4635 | 0 | llvm::SyncScope::ID SSID; |
4636 | 0 | if (BuiltinID == Builtin::BI__atomic_signal_fence || |
4637 | 0 | BuiltinID == Builtin::BI__c11_atomic_signal_fence) |
4638 | 0 | SSID = llvm::SyncScope::SingleThread; |
4639 | 0 | else |
4640 | 0 | SSID = llvm::SyncScope::System; |
4641 | 0 | Value *Order = EmitScalarExpr(E->getArg(0)); |
4642 | 0 | if (isa<llvm::ConstantInt>(Order)) { |
4643 | 0 | int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); |
4644 | 0 | switch (ord) { |
4645 | 0 | case 0: // memory_order_relaxed |
4646 | 0 | default: // invalid order |
4647 | 0 | break; |
4648 | 0 | case 1: // memory_order_consume |
4649 | 0 | case 2: // memory_order_acquire |
4650 | 0 | Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); |
4651 | 0 | break; |
4652 | 0 | case 3: // memory_order_release |
4653 | 0 | Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); |
4654 | 0 | break; |
4655 | 0 | case 4: // memory_order_acq_rel |
4656 | 0 | Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); |
4657 | 0 | break; |
4658 | 0 | case 5: // memory_order_seq_cst |
4659 | 0 | Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); |
4660 | 0 | break; |
4661 | 0 | } |
4662 | 0 | return RValue::get(nullptr); |
4663 | 0 | } |
4664 | | |
4665 | 0 | llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; |
4666 | 0 | AcquireBB = createBasicBlock("acquire", CurFn); |
4667 | 0 | ReleaseBB = createBasicBlock("release", CurFn); |
4668 | 0 | AcqRelBB = createBasicBlock("acqrel", CurFn); |
4669 | 0 | SeqCstBB = createBasicBlock("seqcst", CurFn); |
4670 | 0 | llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); |
4671 | |
|
4672 | 0 | Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); |
4673 | 0 | llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); |
4674 | |
|
4675 | 0 | Builder.SetInsertPoint(AcquireBB); |
4676 | 0 | Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); |
4677 | 0 | Builder.CreateBr(ContBB); |
4678 | 0 | SI->addCase(Builder.getInt32(1), AcquireBB); |
4679 | 0 | SI->addCase(Builder.getInt32(2), AcquireBB); |
4680 | |
|
4681 | 0 | Builder.SetInsertPoint(ReleaseBB); |
4682 | 0 | Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); |
4683 | 0 | Builder.CreateBr(ContBB); |
4684 | 0 | SI->addCase(Builder.getInt32(3), ReleaseBB); |
4685 | |
|
4686 | 0 | Builder.SetInsertPoint(AcqRelBB); |
4687 | 0 | Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); |
4688 | 0 | Builder.CreateBr(ContBB); |
4689 | 0 | SI->addCase(Builder.getInt32(4), AcqRelBB); |
4690 | |
|
4691 | 0 | Builder.SetInsertPoint(SeqCstBB); |
4692 | 0 | Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); |
4693 | 0 | Builder.CreateBr(ContBB); |
4694 | 0 | SI->addCase(Builder.getInt32(5), SeqCstBB); |
4695 | |
|
4696 | 0 | Builder.SetInsertPoint(ContBB); |
4697 | 0 | return RValue::get(nullptr); |
4698 | 0 | } |
4699 | | |
4700 | 12 | case Builtin::BI__builtin_signbit: |
4701 | 22 | case Builtin::BI__builtin_signbitf: |
4702 | 30 | case Builtin::BI__builtin_signbitl: { |
4703 | 30 | return RValue::get( |
4704 | 30 | Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), |
4705 | 30 | ConvertType(E->getType()))); |
4706 | 22 | } |
4707 | 2 | case Builtin::BI__warn_memset_zero_len: |
4708 | 2 | return RValue::getIgnored(); |
4709 | 10 | case Builtin::BI__annotation: { |
4710 | | // Re-encode each wide string to UTF8 and make an MDString. |
4711 | 10 | SmallVector<Metadata *, 1> Strings; |
4712 | 16 | for (const Expr *Arg : E->arguments()) { |
4713 | 16 | const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); |
4714 | 16 | assert(Str->getCharByteWidth() == 2); |
4715 | 16 | StringRef WideBytes = Str->getBytes(); |
4716 | 16 | std::string StrUtf8; |
4717 | 16 | if (!convertUTF16ToUTF8String( |
4718 | 16 | ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { |
4719 | 0 | CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); |
4720 | 0 | continue; |
4721 | 0 | } |
4722 | 16 | Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); |
4723 | 16 | } |
4724 | | |
4725 | | // Build and MDTuple of MDStrings and emit the intrinsic call. |
4726 | 10 | llvm::Function *F = |
4727 | 10 | CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); |
4728 | 10 | MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); |
4729 | 10 | Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); |
4730 | 10 | return RValue::getIgnored(); |
4731 | 10 | } |
4732 | 7 | case Builtin::BI__builtin_annotation: { |
4733 | 7 | llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); |
4734 | 7 | llvm::Function *F = |
4735 | 7 | CGM.getIntrinsic(llvm::Intrinsic::annotation, |
4736 | 7 | {AnnVal->getType(), CGM.ConstGlobalsPtrTy}); |
4737 | | |
4738 | | // Get the annotation string, go through casts. Sema requires this to be a |
4739 | | // non-wide string literal, potentially casted, so the cast<> is safe. |
4740 | 7 | const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); |
4741 | 7 | StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); |
4742 | 7 | return RValue::get( |
4743 | 7 | EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr)); |
4744 | 10 | } |
4745 | 3 | case Builtin::BI__builtin_addcb: |
4746 | 6 | case Builtin::BI__builtin_addcs: |
4747 | 9 | case Builtin::BI__builtin_addc: |
4748 | 12 | case Builtin::BI__builtin_addcl: |
4749 | 15 | case Builtin::BI__builtin_addcll: |
4750 | 18 | case Builtin::BI__builtin_subcb: |
4751 | 21 | case Builtin::BI__builtin_subcs: |
4752 | 24 | case Builtin::BI__builtin_subc: |
4753 | 27 | case Builtin::BI__builtin_subcl: |
4754 | 30 | case Builtin::BI__builtin_subcll: { |
4755 | | |
4756 | | // We translate all of these builtins from expressions of the form: |
4757 | | // int x = ..., y = ..., carryin = ..., carryout, result; |
4758 | | // result = __builtin_addc(x, y, carryin, &carryout); |
4759 | | // |
4760 | | // to LLVM IR of the form: |
4761 | | // |
4762 | | // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) |
4763 | | // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 |
4764 | | // %carry1 = extractvalue {i32, i1} %tmp1, 1 |
4765 | | // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, |
4766 | | // i32 %carryin) |
4767 | | // %result = extractvalue {i32, i1} %tmp2, 0 |
4768 | | // %carry2 = extractvalue {i32, i1} %tmp2, 1 |
4769 | | // %tmp3 = or i1 %carry1, %carry2 |
4770 | | // %tmp4 = zext i1 %tmp3 to i32 |
4771 | | // store i32 %tmp4, i32* %carryout |
4772 | | |
4773 | | // Scalarize our inputs. |
4774 | 30 | llvm::Value *X = EmitScalarExpr(E->getArg(0)); |
4775 | 30 | llvm::Value *Y = EmitScalarExpr(E->getArg(1)); |
4776 | 30 | llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); |
4777 | 30 | Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); |
4778 | | |
4779 | | // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. |
4780 | 30 | llvm::Intrinsic::ID IntrinsicId; |
4781 | 30 | switch (BuiltinID) { |
4782 | 0 | default: llvm_unreachable("Unknown multiprecision builtin id."); |
4783 | 3 | case Builtin::BI__builtin_addcb: |
4784 | 6 | case Builtin::BI__builtin_addcs: |
4785 | 9 | case Builtin::BI__builtin_addc: |
4786 | 12 | case Builtin::BI__builtin_addcl: |
4787 | 15 | case Builtin::BI__builtin_addcll: |
4788 | 15 | IntrinsicId = llvm::Intrinsic::uadd_with_overflow; |
4789 | 15 | break; |
4790 | 3 | case Builtin::BI__builtin_subcb: |
4791 | 6 | case Builtin::BI__builtin_subcs: |
4792 | 9 | case Builtin::BI__builtin_subc: |
4793 | 12 | case Builtin::BI__builtin_subcl: |
4794 | 15 | case Builtin::BI__builtin_subcll: |
4795 | 15 | IntrinsicId = llvm::Intrinsic::usub_with_overflow; |
4796 | 15 | break; |
4797 | 30 | } |
4798 | | |
4799 | | // Construct our resulting LLVM IR expression. |
4800 | 30 | llvm::Value *Carry1; |
4801 | 30 | llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, |
4802 | 30 | X, Y, Carry1); |
4803 | 30 | llvm::Value *Carry2; |
4804 | 30 | llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, |
4805 | 30 | Sum1, Carryin, Carry2); |
4806 | 30 | llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), |
4807 | 30 | X->getType()); |
4808 | 30 | Builder.CreateStore(CarryOut, CarryOutPtr); |
4809 | 30 | return RValue::get(Sum2); |
4810 | 30 | } |
4811 | | |
4812 | 29 | case Builtin::BI__builtin_add_overflow: |
4813 | 43 | case Builtin::BI__builtin_sub_overflow: |
4814 | 105 | case Builtin::BI__builtin_mul_overflow: { |
4815 | 105 | const clang::Expr *LeftArg = E->getArg(0); |
4816 | 105 | const clang::Expr *RightArg = E->getArg(1); |
4817 | 105 | const clang::Expr *ResultArg = E->getArg(2); |
4818 | | |
4819 | 105 | clang::QualType ResultQTy = |
4820 | 105 | ResultArg->getType()->castAs<PointerType>()->getPointeeType(); |
4821 | | |
4822 | 105 | WidthAndSignedness LeftInfo = |
4823 | 105 | getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); |
4824 | 105 | WidthAndSignedness RightInfo = |
4825 | 105 | getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); |
4826 | 105 | WidthAndSignedness ResultInfo = |
4827 | 105 | getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); |
4828 | | |
4829 | | // Handle mixed-sign multiplication as a special case, because adding |
4830 | | // runtime or backend support for our generic irgen would be too expensive. |
4831 | 105 | if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo)) |
4832 | 27 | return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg, |
4833 | 27 | RightInfo, ResultArg, ResultQTy, |
4834 | 27 | ResultInfo); |
4835 | | |
4836 | 78 | if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo, |
4837 | 78 | ResultInfo)) |
4838 | 9 | return EmitCheckedUnsignedMultiplySignedResult( |
4839 | 9 | *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy, |
4840 | 9 | ResultInfo); |
4841 | | |
4842 | 69 | WidthAndSignedness EncompassingInfo = |
4843 | 69 | EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); |
4844 | | |
4845 | 69 | llvm::Type *EncompassingLLVMTy = |
4846 | 69 | llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); |
4847 | | |
4848 | 69 | llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); |
4849 | | |
4850 | 69 | llvm::Intrinsic::ID IntrinsicId; |
4851 | 69 | switch (BuiltinID) { |
4852 | 0 | default: |
4853 | 0 | llvm_unreachable("Unknown overflow builtin id."); |
4854 | 29 | case Builtin::BI__builtin_add_overflow: |
4855 | 29 | IntrinsicId = EncompassingInfo.Signed |
4856 | 29 | ? llvm::Intrinsic::sadd_with_overflow17 |
4857 | 29 | : llvm::Intrinsic::uadd_with_overflow12 ; |
4858 | 29 | break; |
4859 | 14 | case Builtin::BI__builtin_sub_overflow: |
4860 | 14 | IntrinsicId = EncompassingInfo.Signed |
4861 | 14 | ? llvm::Intrinsic::ssub_with_overflow11 |
4862 | 14 | : llvm::Intrinsic::usub_with_overflow3 ; |
4863 | 14 | break; |
4864 | 26 | case Builtin::BI__builtin_mul_overflow: |
4865 | 26 | IntrinsicId = EncompassingInfo.Signed |
4866 | 26 | ? llvm::Intrinsic::smul_with_overflow23 |
4867 | 26 | : llvm::Intrinsic::umul_with_overflow3 ; |
4868 | 26 | break; |
4869 | 69 | } |
4870 | | |
4871 | 69 | llvm::Value *Left = EmitScalarExpr(LeftArg); |
4872 | 69 | llvm::Value *Right = EmitScalarExpr(RightArg); |
4873 | 69 | Address ResultPtr = EmitPointerWithAlignment(ResultArg); |
4874 | | |
4875 | | // Extend each operand to the encompassing type. |
4876 | 69 | Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); |
4877 | 69 | Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); |
4878 | | |
4879 | | // Perform the operation on the extended values. |
4880 | 69 | llvm::Value *Overflow, *Result; |
4881 | 69 | Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); |
4882 | | |
4883 | 69 | if (EncompassingInfo.Width > ResultInfo.Width) { |
4884 | | // The encompassing type is wider than the result type, so we need to |
4885 | | // truncate it. |
4886 | 9 | llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); |
4887 | | |
4888 | | // To see if the truncation caused an overflow, we will extend |
4889 | | // the result and then compare it to the original result. |
4890 | 9 | llvm::Value *ResultTruncExt = Builder.CreateIntCast( |
4891 | 9 | ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); |
4892 | 9 | llvm::Value *TruncationOverflow = |
4893 | 9 | Builder.CreateICmpNE(Result, ResultTruncExt); |
4894 | | |
4895 | 9 | Overflow = Builder.CreateOr(Overflow, TruncationOverflow); |
4896 | 9 | Result = ResultTrunc; |
4897 | 9 | } |
4898 | | |
4899 | | // Finally, store the result using the pointer. |
4900 | 69 | bool isVolatile = |
4901 | 69 | ResultArg->getType()->getPointeeType().isVolatileQualified(); |
4902 | 69 | Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); |
4903 | | |
4904 | 69 | return RValue::get(Overflow); |
4905 | 69 | } |
4906 | | |
4907 | 3 | case Builtin::BI__builtin_uadd_overflow: |
4908 | 6 | case Builtin::BI__builtin_uaddl_overflow: |
4909 | 9 | case Builtin::BI__builtin_uaddll_overflow: |
4910 | 12 | case Builtin::BI__builtin_usub_overflow: |
4911 | 15 | case Builtin::BI__builtin_usubl_overflow: |
4912 | 18 | case Builtin::BI__builtin_usubll_overflow: |
4913 | 21 | case Builtin::BI__builtin_umul_overflow: |
4914 | 24 | case Builtin::BI__builtin_umull_overflow: |
4915 | 27 | case Builtin::BI__builtin_umulll_overflow: |
4916 | 31 | case Builtin::BI__builtin_sadd_overflow: |
4917 | 34 | case Builtin::BI__builtin_saddl_overflow: |
4918 | 37 | case Builtin::BI__builtin_saddll_overflow: |
4919 | 40 | case Builtin::BI__builtin_ssub_overflow: |
4920 | 43 | case Builtin::BI__builtin_ssubl_overflow: |
4921 | 46 | case Builtin::BI__builtin_ssubll_overflow: |
4922 | 49 | case Builtin::BI__builtin_smul_overflow: |
4923 | 52 | case Builtin::BI__builtin_smull_overflow: |
4924 | 55 | case Builtin::BI__builtin_smulll_overflow: { |
4925 | | |
4926 | | // We translate all of these builtins directly to the relevant llvm IR node. |
4927 | | |
4928 | | // Scalarize our inputs. |
4929 | 55 | llvm::Value *X = EmitScalarExpr(E->getArg(0)); |
4930 | 55 | llvm::Value *Y = EmitScalarExpr(E->getArg(1)); |
4931 | 55 | Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); |
4932 | | |
4933 | | // Decide which of the overflow intrinsics we are lowering to: |
4934 | 55 | llvm::Intrinsic::ID IntrinsicId; |
4935 | 55 | switch (BuiltinID) { |
4936 | 0 | default: llvm_unreachable("Unknown overflow builtin id."); |
4937 | 3 | case Builtin::BI__builtin_uadd_overflow: |
4938 | 6 | case Builtin::BI__builtin_uaddl_overflow: |
4939 | 9 | case Builtin::BI__builtin_uaddll_overflow: |
4940 | 9 | IntrinsicId = llvm::Intrinsic::uadd_with_overflow; |
4941 | 9 | break; |
4942 | 3 | case Builtin::BI__builtin_usub_overflow: |
4943 | 6 | case Builtin::BI__builtin_usubl_overflow: |
4944 | 9 | case Builtin::BI__builtin_usubll_overflow: |
4945 | 9 | IntrinsicId = llvm::Intrinsic::usub_with_overflow; |
4946 | 9 | break; |
4947 | 3 | case Builtin::BI__builtin_umul_overflow: |
4948 | 6 | case Builtin::BI__builtin_umull_overflow: |
4949 | 9 | case Builtin::BI__builtin_umulll_overflow: |
4950 | 9 | IntrinsicId = llvm::Intrinsic::umul_with_overflow; |
4951 | 9 | break; |
4952 | 4 | case Builtin::BI__builtin_sadd_overflow: |
4953 | 7 | case Builtin::BI__builtin_saddl_overflow: |
4954 | 10 | case Builtin::BI__builtin_saddll_overflow: |
4955 | 10 | IntrinsicId = llvm::Intrinsic::sadd_with_overflow; |
4956 | 10 | break; |
4957 | 3 | case Builtin::BI__builtin_ssub_overflow: |
4958 | 6 | case Builtin::BI__builtin_ssubl_overflow: |
4959 | 9 | case Builtin::BI__builtin_ssubll_overflow: |
4960 | 9 | IntrinsicId = llvm::Intrinsic::ssub_with_overflow; |
4961 | 9 | break; |
4962 | 3 | case Builtin::BI__builtin_smul_overflow: |
4963 | 6 | case Builtin::BI__builtin_smull_overflow: |
4964 | 9 | case Builtin::BI__builtin_smulll_overflow: |
4965 | 9 | IntrinsicId = llvm::Intrinsic::smul_with_overflow; |
4966 | 9 | break; |
4967 | 55 | } |
4968 | | |
4969 | | |
4970 | 55 | llvm::Value *Carry; |
4971 | 55 | llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); |
4972 | 55 | Builder.CreateStore(Sum, SumOutPtr); |
4973 | | |
4974 | 55 | return RValue::get(Carry); |
4975 | 55 | } |
4976 | 1.01k | case Builtin::BIaddressof: |
4977 | 1.01k | case Builtin::BI__addressof: |
4978 | 1.05k | case Builtin::BI__builtin_addressof: |
4979 | 1.05k | return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); |
4980 | 7 | case Builtin::BI__builtin_function_start: |
4981 | 7 | return RValue::get(CGM.GetFunctionStart( |
4982 | 7 | E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext()))); |
4983 | 237 | case Builtin::BI__builtin_operator_new: |
4984 | 237 | return EmitBuiltinNewDeleteCall( |
4985 | 237 | E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); |
4986 | 252 | case Builtin::BI__builtin_operator_delete: |
4987 | 252 | EmitBuiltinNewDeleteCall( |
4988 | 252 | E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); |
4989 | 252 | return RValue::get(nullptr); |
4990 | | |
4991 | 5 | case Builtin::BI__builtin_is_aligned: |
4992 | 5 | return EmitBuiltinIsAligned(E); |
4993 | 7 | case Builtin::BI__builtin_align_up: |
4994 | 7 | return EmitBuiltinAlignTo(E, true); |
4995 | 6 | case Builtin::BI__builtin_align_down: |
4996 | 6 | return EmitBuiltinAlignTo(E, false); |
4997 | | |
4998 | 7 | case Builtin::BI__noop: |
4999 | | // __noop always evaluates to an integer literal zero. |
5000 | 7 | return RValue::get(ConstantInt::get(IntTy, 0)); |
5001 | 8 | case Builtin::BI__builtin_call_with_static_chain: { |
5002 | 8 | const CallExpr *Call = cast<CallExpr>(E->getArg(0)); |
5003 | 8 | const Expr *Chain = E->getArg(1); |
5004 | 8 | return EmitCall(Call->getCallee()->getType(), |
5005 | 8 | EmitCallee(Call->getCallee()), Call, ReturnValue, |
5006 | 8 | EmitScalarExpr(Chain)); |
5007 | 1.01k | } |
5008 | 4 | case Builtin::BI_InterlockedExchange8: |
5009 | 8 | case Builtin::BI_InterlockedExchange16: |
5010 | 22 | case Builtin::BI_InterlockedExchange: |
5011 | 26 | case Builtin::BI_InterlockedExchangePointer: |
5012 | 26 | return RValue::get( |
5013 | 26 | EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); |
5014 | 4 | case Builtin::BI_InterlockedCompareExchangePointer: |
5015 | 8 | case Builtin::BI_InterlockedCompareExchangePointer_nf: { |
5016 | 8 | llvm::Type *RTy; |
5017 | 8 | llvm::IntegerType *IntType = IntegerType::get( |
5018 | 8 | getLLVMContext(), getContext().getTypeSize(E->getType())); |
5019 | | |
5020 | 8 | llvm::Value *Destination = EmitScalarExpr(E->getArg(0)); |
5021 | | |
5022 | 8 | llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); |
5023 | 8 | RTy = Exchange->getType(); |
5024 | 8 | Exchange = Builder.CreatePtrToInt(Exchange, IntType); |
5025 | | |
5026 | 8 | llvm::Value *Comparand = |
5027 | 8 | Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); |
5028 | | |
5029 | 8 | auto Ordering = |
5030 | 8 | BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? |
5031 | 4 | AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; |
5032 | | |
5033 | 8 | auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, |
5034 | 8 | Ordering, Ordering); |
5035 | 8 | Result->setVolatile(true); |
5036 | | |
5037 | 8 | return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, |
5038 | 8 | 0), |
5039 | 8 | RTy)); |
5040 | 4 | } |
5041 | 4 | case Builtin::BI_InterlockedCompareExchange8: |
5042 | 8 | case Builtin::BI_InterlockedCompareExchange16: |
5043 | 22 | case Builtin::BI_InterlockedCompareExchange: |
5044 | 26 | case Builtin::BI_InterlockedCompareExchange64: |
5045 | 26 | return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E)); |
5046 | 4 | case Builtin::BI_InterlockedIncrement16: |
5047 | 18 | case Builtin::BI_InterlockedIncrement: |
5048 | 18 | return RValue::get( |
5049 | 18 | EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); |
5050 | 4 | case Builtin::BI_InterlockedDecrement16: |
5051 | 18 | case Builtin::BI_InterlockedDecrement: |
5052 | 18 | return RValue::get( |
5053 | 18 | EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); |
5054 | 4 | case Builtin::BI_InterlockedAnd8: |
5055 | 8 | case Builtin::BI_InterlockedAnd16: |
5056 | 22 | case Builtin::BI_InterlockedAnd: |
5057 | 22 | return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); |
5058 | 4 | case Builtin::BI_InterlockedExchangeAdd8: |
5059 | 8 | case Builtin::BI_InterlockedExchangeAdd16: |
5060 | 22 | case Builtin::BI_InterlockedExchangeAdd: |
5061 | 22 | return RValue::get( |
5062 | 22 | EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); |
5063 | 4 | case Builtin::BI_InterlockedExchangeSub8: |
5064 | 8 | case Builtin::BI_InterlockedExchangeSub16: |
5065 | 22 | case Builtin::BI_InterlockedExchangeSub: |
5066 | 22 | return RValue::get( |
5067 | 22 | EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); |
5068 | 4 | case Builtin::BI_InterlockedOr8: |
5069 | 8 | case Builtin::BI_InterlockedOr16: |
5070 | 22 | case Builtin::BI_InterlockedOr: |
5071 | 22 | return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); |
5072 | 4 | case Builtin::BI_InterlockedXor8: |
5073 | 8 | case Builtin::BI_InterlockedXor16: |
5074 | 22 | case Builtin::BI_InterlockedXor: |
5075 | 22 | return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); |
5076 | | |
5077 | 3 | case Builtin::BI_bittest64: |
5078 | 6 | case Builtin::BI_bittest: |
5079 | 9 | case Builtin::BI_bittestandcomplement64: |
5080 | 12 | case Builtin::BI_bittestandcomplement: |
5081 | 15 | case Builtin::BI_bittestandreset64: |
5082 | 18 | case Builtin::BI_bittestandreset: |
5083 | 21 | case Builtin::BI_bittestandset64: |
5084 | 24 | case Builtin::BI_bittestandset: |
5085 | 27 | case Builtin::BI_interlockedbittestandreset: |
5086 | 30 | case Builtin::BI_interlockedbittestandreset64: |
5087 | 33 | case Builtin::BI_interlockedbittestandset64: |
5088 | 39 | case Builtin::BI_interlockedbittestandset: |
5089 | 41 | case Builtin::BI_interlockedbittestandset_acq: |
5090 | 43 | case Builtin::BI_interlockedbittestandset_rel: |
5091 | 45 | case Builtin::BI_interlockedbittestandset_nf: |
5092 | 47 | case Builtin::BI_interlockedbittestandreset_acq: |
5093 | 49 | case Builtin::BI_interlockedbittestandreset_rel: |
5094 | 51 | case Builtin::BI_interlockedbittestandreset_nf: |
5095 | 51 | return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); |
5096 | | |
5097 | | // These builtins exist to emit regular volatile loads and stores not |
5098 | | // affected by the -fms-volatile setting. |
5099 | 4 | case Builtin::BI__iso_volatile_load8: |
5100 | 8 | case Builtin::BI__iso_volatile_load16: |
5101 | 12 | case Builtin::BI__iso_volatile_load32: |
5102 | 16 | case Builtin::BI__iso_volatile_load64: |
5103 | 16 | return RValue::get(EmitISOVolatileLoad(*this, E)); |
5104 | 4 | case Builtin::BI__iso_volatile_store8: |
5105 | 8 | case Builtin::BI__iso_volatile_store16: |
5106 | 12 | case Builtin::BI__iso_volatile_store32: |
5107 | 16 | case Builtin::BI__iso_volatile_store64: |
5108 | 16 | return RValue::get(EmitISOVolatileStore(*this, E)); |
5109 | | |
5110 | 0 | case Builtin::BI__exception_code: |
5111 | 18 | case Builtin::BI_exception_code: |
5112 | 18 | return RValue::get(EmitSEHExceptionCode()); |
5113 | 0 | case Builtin::BI__exception_info: |
5114 | 0 | case Builtin::BI_exception_info: |
5115 | 0 | return RValue::get(EmitSEHExceptionInfo()); |
5116 | 3 | case Builtin::BI__abnormal_termination: |
5117 | 5 | case Builtin::BI_abnormal_termination: |
5118 | 5 | return RValue::get(EmitSEHAbnormalTermination()); |
5119 | 6 | case Builtin::BI_setjmpex: |
5120 | 6 | if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && |
5121 | 6 | E->getArg(0)->getType()->isPointerType()) |
5122 | 6 | return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); |
5123 | 0 | break; |
5124 | 11 | case Builtin::BI_setjmp: |
5125 | 11 | if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 16 && |
5126 | 11 | E->getArg(0)->getType()->isPointerType()6 ) { |
5127 | 6 | if (getTarget().getTriple().getArch() == llvm::Triple::x86) |
5128 | 2 | return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E); |
5129 | 4 | else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64) |
5130 | 2 | return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); |
5131 | 2 | return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E); |
5132 | 6 | } |
5133 | 5 | break; |
5134 | | |
5135 | | // C++ std:: builtins. |
5136 | 5.57k | case Builtin::BImove: |
5137 | 5.69k | case Builtin::BImove_if_noexcept: |
5138 | 15.6k | case Builtin::BIforward: |
5139 | 15.6k | case Builtin::BIforward_like: |
5140 | 15.6k | case Builtin::BIas_const: |
5141 | 15.6k | return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); |
5142 | 3 | case Builtin::BI__GetExceptionInfo: { |
5143 | 3 | if (llvm::GlobalVariable *GV = |
5144 | 3 | CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) |
5145 | 3 | return RValue::get(GV); |
5146 | 0 | break; |
5147 | 3 | } |
5148 | | |
5149 | 4 | case Builtin::BI__fastfail: |
5150 | 4 | return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); |
5151 | | |
5152 | 3 | case Builtin::BI__builtin_coro_id: |
5153 | 3 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); |
5154 | 28 | case Builtin::BI__builtin_coro_promise: |
5155 | 28 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); |
5156 | 34 | case Builtin::BI__builtin_coro_resume: |
5157 | 34 | EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); |
5158 | 34 | return RValue::get(nullptr); |
5159 | 420 | case Builtin::BI__builtin_coro_frame: |
5160 | 420 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); |
5161 | 4 | case Builtin::BI__builtin_coro_noop: |
5162 | 4 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop); |
5163 | 136 | case Builtin::BI__builtin_coro_free: |
5164 | 136 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); |
5165 | 14 | case Builtin::BI__builtin_coro_destroy: |
5166 | 14 | EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); |
5167 | 14 | return RValue::get(nullptr); |
5168 | 5 | case Builtin::BI__builtin_coro_done: |
5169 | 5 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); |
5170 | 2 | case Builtin::BI__builtin_coro_alloc: |
5171 | 2 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); |
5172 | 2 | case Builtin::BI__builtin_coro_begin: |
5173 | 2 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); |
5174 | 1 | case Builtin::BI__builtin_coro_end: |
5175 | 1 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); |
5176 | 1 | case Builtin::BI__builtin_coro_suspend: |
5177 | 1 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); |
5178 | 117 | case Builtin::BI__builtin_coro_size: |
5179 | 117 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_size); |
5180 | 21 | case Builtin::BI__builtin_coro_align: |
5181 | 21 | return EmitCoroutineIntrinsic(E, Intrinsic::coro_align); |
5182 | | |
5183 | | // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions |
5184 | 13 | case Builtin::BIread_pipe: |
5185 | 16 | case Builtin::BIwrite_pipe: { |
5186 | 16 | Value *Arg0 = EmitScalarExpr(E->getArg(0)), |
5187 | 16 | *Arg1 = EmitScalarExpr(E->getArg(1)); |
5188 | 16 | CGOpenCLRuntime OpenCLRT(CGM); |
5189 | 16 | Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); |
5190 | 16 | Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); |
5191 | | |
5192 | | // Type of the generic packet parameter. |
5193 | 16 | unsigned GenericAS = |
5194 | 16 | getContext().getTargetAddressSpace(LangAS::opencl_generic); |
5195 | 16 | llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS); |
5196 | | |
5197 | | // Testing which overloaded version we should generate the call for. |
5198 | 16 | if (2U == E->getNumArgs()) { |
5199 | 14 | const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"12 |
5200 | 14 | : "__write_pipe_2"2 ; |
5201 | | // Creating a generic function type to be able to call with any builtin or |
5202 | | // user defined type. |
5203 | 14 | llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; |
5204 | 14 | llvm::FunctionType *FTy = llvm::FunctionType::get( |
5205 | 14 | Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5206 | 14 | Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); |
5207 | 14 | return RValue::get( |
5208 | 14 | EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), |
5209 | 14 | {Arg0, BCast, PacketSize, PacketAlign})); |
5210 | 14 | } else { |
5211 | 2 | assert(4 == E->getNumArgs() && |
5212 | 2 | "Illegal number of parameters to pipe function"); |
5213 | 2 | const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"1 |
5214 | 2 | : "__write_pipe_4"1 ; |
5215 | | |
5216 | 2 | llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, |
5217 | 2 | Int32Ty, Int32Ty}; |
5218 | 2 | Value *Arg2 = EmitScalarExpr(E->getArg(2)), |
5219 | 2 | *Arg3 = EmitScalarExpr(E->getArg(3)); |
5220 | 2 | llvm::FunctionType *FTy = llvm::FunctionType::get( |
5221 | 2 | Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5222 | 2 | Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); |
5223 | | // We know the third argument is an integer type, but we may need to cast |
5224 | | // it to i32. |
5225 | 2 | if (Arg2->getType() != Int32Ty) |
5226 | 0 | Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); |
5227 | 2 | return RValue::get( |
5228 | 2 | EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), |
5229 | 2 | {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); |
5230 | 2 | } |
5231 | 16 | } |
5232 | | // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write |
5233 | | // functions |
5234 | 1 | case Builtin::BIreserve_read_pipe: |
5235 | 2 | case Builtin::BIreserve_write_pipe: |
5236 | 3 | case Builtin::BIwork_group_reserve_read_pipe: |
5237 | 4 | case Builtin::BIwork_group_reserve_write_pipe: |
5238 | 5 | case Builtin::BIsub_group_reserve_read_pipe: |
5239 | 6 | case Builtin::BIsub_group_reserve_write_pipe: { |
5240 | | // Composing the mangled name for the function. |
5241 | 6 | const char *Name; |
5242 | 6 | if (BuiltinID == Builtin::BIreserve_read_pipe) |
5243 | 1 | Name = "__reserve_read_pipe"; |
5244 | 5 | else if (BuiltinID == Builtin::BIreserve_write_pipe) |
5245 | 1 | Name = "__reserve_write_pipe"; |
5246 | 4 | else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) |
5247 | 1 | Name = "__work_group_reserve_read_pipe"; |
5248 | 3 | else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) |
5249 | 1 | Name = "__work_group_reserve_write_pipe"; |
5250 | 2 | else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) |
5251 | 1 | Name = "__sub_group_reserve_read_pipe"; |
5252 | 1 | else |
5253 | 1 | Name = "__sub_group_reserve_write_pipe"; |
5254 | | |
5255 | 6 | Value *Arg0 = EmitScalarExpr(E->getArg(0)), |
5256 | 6 | *Arg1 = EmitScalarExpr(E->getArg(1)); |
5257 | 6 | llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); |
5258 | 6 | CGOpenCLRuntime OpenCLRT(CGM); |
5259 | 6 | Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); |
5260 | 6 | Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); |
5261 | | |
5262 | | // Building the generic function prototype. |
5263 | 6 | llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; |
5264 | 6 | llvm::FunctionType *FTy = llvm::FunctionType::get( |
5265 | 6 | ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5266 | | // We know the second argument is an integer type, but we may need to cast |
5267 | | // it to i32. |
5268 | 6 | if (Arg1->getType() != Int32Ty) |
5269 | 0 | Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); |
5270 | 6 | return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), |
5271 | 6 | {Arg0, Arg1, PacketSize, PacketAlign})); |
5272 | 5 | } |
5273 | | // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write |
5274 | | // functions |
5275 | 1 | case Builtin::BIcommit_read_pipe: |
5276 | 2 | case Builtin::BIcommit_write_pipe: |
5277 | 3 | case Builtin::BIwork_group_commit_read_pipe: |
5278 | 4 | case Builtin::BIwork_group_commit_write_pipe: |
5279 | 5 | case Builtin::BIsub_group_commit_read_pipe: |
5280 | 6 | case Builtin::BIsub_group_commit_write_pipe: { |
5281 | 6 | const char *Name; |
5282 | 6 | if (BuiltinID == Builtin::BIcommit_read_pipe) |
5283 | 1 | Name = "__commit_read_pipe"; |
5284 | 5 | else if (BuiltinID == Builtin::BIcommit_write_pipe) |
5285 | 1 | Name = "__commit_write_pipe"; |
5286 | 4 | else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) |
5287 | 1 | Name = "__work_group_commit_read_pipe"; |
5288 | 3 | else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) |
5289 | 1 | Name = "__work_group_commit_write_pipe"; |
5290 | 2 | else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) |
5291 | 1 | Name = "__sub_group_commit_read_pipe"; |
5292 | 1 | else |
5293 | 1 | Name = "__sub_group_commit_write_pipe"; |
5294 | | |
5295 | 6 | Value *Arg0 = EmitScalarExpr(E->getArg(0)), |
5296 | 6 | *Arg1 = EmitScalarExpr(E->getArg(1)); |
5297 | 6 | CGOpenCLRuntime OpenCLRT(CGM); |
5298 | 6 | Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); |
5299 | 6 | Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); |
5300 | | |
5301 | | // Building the generic function prototype. |
5302 | 6 | llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; |
5303 | 6 | llvm::FunctionType *FTy = |
5304 | 6 | llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), |
5305 | 6 | llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5306 | | |
5307 | 6 | return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), |
5308 | 6 | {Arg0, Arg1, PacketSize, PacketAlign})); |
5309 | 5 | } |
5310 | | // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions |
5311 | 4 | case Builtin::BIget_pipe_num_packets: |
5312 | 8 | case Builtin::BIget_pipe_max_packets: { |
5313 | 8 | const char *BaseName; |
5314 | 8 | const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>(); |
5315 | 8 | if (BuiltinID == Builtin::BIget_pipe_num_packets) |
5316 | 4 | BaseName = "__get_pipe_num_packets"; |
5317 | 4 | else |
5318 | 4 | BaseName = "__get_pipe_max_packets"; |
5319 | 8 | std::string Name = std::string(BaseName) + |
5320 | 8 | std::string(PipeTy->isReadOnly() ? "_ro"4 : "_wo"4 ); |
5321 | | |
5322 | | // Building the generic function prototype. |
5323 | 8 | Value *Arg0 = EmitScalarExpr(E->getArg(0)); |
5324 | 8 | CGOpenCLRuntime OpenCLRT(CGM); |
5325 | 8 | Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); |
5326 | 8 | Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); |
5327 | 8 | llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; |
5328 | 8 | llvm::FunctionType *FTy = llvm::FunctionType::get( |
5329 | 8 | Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5330 | | |
5331 | 8 | return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), |
5332 | 8 | {Arg0, PacketSize, PacketAlign})); |
5333 | 4 | } |
5334 | | |
5335 | | // OpenCL v2.0 s6.13.9 - Address space qualifier functions. |
5336 | 19 | case Builtin::BIto_global: |
5337 | 32 | case Builtin::BIto_local: |
5338 | 45 | case Builtin::BIto_private: { |
5339 | 45 | auto Arg0 = EmitScalarExpr(E->getArg(0)); |
5340 | 45 | auto NewArgT = llvm::PointerType::get( |
5341 | 45 | getLLVMContext(), |
5342 | 45 | CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); |
5343 | 45 | auto NewRetT = llvm::PointerType::get( |
5344 | 45 | getLLVMContext(), |
5345 | 45 | CGM.getContext().getTargetAddressSpace( |
5346 | 45 | E->getType()->getPointeeType().getAddressSpace())); |
5347 | 45 | auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); |
5348 | 45 | llvm::Value *NewArg; |
5349 | 45 | if (Arg0->getType()->getPointerAddressSpace() != |
5350 | 45 | NewArgT->getPointerAddressSpace()) |
5351 | 30 | NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); |
5352 | 15 | else |
5353 | 15 | NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); |
5354 | 45 | auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); |
5355 | 45 | auto NewCall = |
5356 | 45 | EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); |
5357 | 45 | return RValue::get(Builder.CreateBitOrPointerCast(NewCall, |
5358 | 45 | ConvertType(E->getType()))); |
5359 | 32 | } |
5360 | | |
5361 | | // OpenCL v2.0, s6.13.17 - Enqueue kernel function. |
5362 | | // It contains four different overload formats specified in Table 6.13.17.1. |
5363 | 125 | case Builtin::BIenqueue_kernel: { |
5364 | 125 | StringRef Name; // Generated function call name |
5365 | 125 | unsigned NumArgs = E->getNumArgs(); |
5366 | | |
5367 | 125 | llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); |
5368 | 125 | llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( |
5369 | 125 | getContext().getTargetAddressSpace(LangAS::opencl_generic)); |
5370 | | |
5371 | 125 | llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); |
5372 | 125 | llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); |
5373 | 125 | LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); |
5374 | 125 | llvm::Value *Range = NDRangeL.getAddress(*this).getPointer(); |
5375 | 125 | llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType(); |
5376 | | |
5377 | 125 | if (NumArgs == 4) { |
5378 | | // The most basic form of the call with parameters: |
5379 | | // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) |
5380 | 38 | Name = "__enqueue_kernel_basic"; |
5381 | 38 | llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, |
5382 | 38 | GenericVoidPtrTy}; |
5383 | 38 | llvm::FunctionType *FTy = llvm::FunctionType::get( |
5384 | 38 | Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5385 | | |
5386 | 38 | auto Info = |
5387 | 38 | CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); |
5388 | 38 | llvm::Value *Kernel = |
5389 | 38 | Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); |
5390 | 38 | llvm::Value *Block = |
5391 | 38 | Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); |
5392 | | |
5393 | 38 | AttrBuilder B(Builder.getContext()); |
5394 | 38 | B.addByValAttr(NDRangeL.getAddress(*this).getElementType()); |
5395 | 38 | llvm::AttributeList ByValAttrSet = |
5396 | 38 | llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); |
5397 | | |
5398 | 38 | auto RTCall = |
5399 | 38 | EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), |
5400 | 38 | {Queue, Flags, Range, Kernel, Block}); |
5401 | 38 | RTCall->setAttributes(ByValAttrSet); |
5402 | 38 | return RValue::get(RTCall); |
5403 | 38 | } |
5404 | 87 | assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); |
5405 | | |
5406 | | // Create a temporary array to hold the sizes of local pointer arguments |
5407 | | // for the block. \p First is the position of the first size argument. |
5408 | 87 | auto CreateArrayForSizeVar = [=](unsigned First) |
5409 | 87 | -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { |
5410 | 69 | llvm::APInt ArraySize(32, NumArgs - First); |
5411 | 69 | QualType SizeArrayTy = getContext().getConstantArrayType( |
5412 | 69 | getContext().getSizeType(), ArraySize, nullptr, |
5413 | 69 | ArraySizeModifier::Normal, |
5414 | 69 | /*IndexTypeQuals=*/0); |
5415 | 69 | auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); |
5416 | 69 | llvm::Value *TmpPtr = Tmp.getPointer(); |
5417 | 69 | llvm::Value *TmpSize = EmitLifetimeStart( |
5418 | 69 | CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); |
5419 | 69 | llvm::Value *ElemPtr; |
5420 | | // Each of the following arguments specifies the size of the corresponding |
5421 | | // argument passed to the enqueued block. |
5422 | 69 | auto *Zero = llvm::ConstantInt::get(IntTy, 0); |
5423 | 156 | for (unsigned I = First; I < NumArgs; ++I87 ) { |
5424 | 87 | auto *Index = llvm::ConstantInt::get(IntTy, I - First); |
5425 | 87 | auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr, |
5426 | 87 | {Zero, Index}); |
5427 | 87 | if (I == First) |
5428 | 69 | ElemPtr = GEP; |
5429 | 87 | auto *V = |
5430 | 87 | Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); |
5431 | 87 | Builder.CreateAlignedStore( |
5432 | 87 | V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); |
5433 | 87 | } |
5434 | 69 | return std::tie(ElemPtr, TmpSize, TmpPtr); |
5435 | 69 | }; |
5436 | | |
5437 | | // Could have events and/or varargs. |
5438 | 87 | if (E->getArg(3)->getType()->isBlockPointerType()) { |
5439 | | // No events passed, but has variadic arguments. |
5440 | 51 | Name = "__enqueue_kernel_varargs"; |
5441 | 51 | auto Info = |
5442 | 51 | CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); |
5443 | 51 | llvm::Value *Kernel = |
5444 | 51 | Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); |
5445 | 51 | auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); |
5446 | 51 | llvm::Value *ElemPtr, *TmpSize, *TmpPtr; |
5447 | 51 | std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); |
5448 | | |
5449 | | // Create a vector of the arguments, as well as a constant value to |
5450 | | // express to the runtime the number of variadic arguments. |
5451 | 51 | llvm::Value *const Args[] = {Queue, Flags, |
5452 | 51 | Range, Kernel, |
5453 | 51 | Block, ConstantInt::get(IntTy, NumArgs - 4), |
5454 | 51 | ElemPtr}; |
5455 | 51 | llvm::Type *const ArgTys[] = { |
5456 | 51 | QueueTy, IntTy, RangeTy, GenericVoidPtrTy, |
5457 | 51 | GenericVoidPtrTy, IntTy, ElemPtr->getType()}; |
5458 | | |
5459 | 51 | llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); |
5460 | 51 | auto Call = RValue::get( |
5461 | 51 | EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); |
5462 | 51 | if (TmpSize) |
5463 | 16 | EmitLifetimeEnd(TmpSize, TmpPtr); |
5464 | 51 | return Call; |
5465 | 51 | } |
5466 | | // Any calls now have event arguments passed. |
5467 | 36 | if (NumArgs >= 7) { |
5468 | 36 | llvm::PointerType *PtrTy = llvm::PointerType::get( |
5469 | 36 | CGM.getLLVMContext(), |
5470 | 36 | CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); |
5471 | | |
5472 | 36 | llvm::Value *NumEvents = |
5473 | 36 | Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); |
5474 | | |
5475 | | // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments |
5476 | | // to be a null pointer constant (including `0` literal), we can take it |
5477 | | // into account and emit null pointer directly. |
5478 | 36 | llvm::Value *EventWaitList = nullptr; |
5479 | 36 | if (E->getArg(4)->isNullPointerConstant( |
5480 | 36 | getContext(), Expr::NPC_ValueDependentIsNotNull)) { |
5481 | 9 | EventWaitList = llvm::ConstantPointerNull::get(PtrTy); |
5482 | 27 | } else { |
5483 | 27 | EventWaitList = E->getArg(4)->getType()->isArrayType() |
5484 | 27 | ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()18 |
5485 | 27 | : EmitScalarExpr(E->getArg(4))9 ; |
5486 | | // Convert to generic address space. |
5487 | 27 | EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy); |
5488 | 27 | } |
5489 | 36 | llvm::Value *EventRet = nullptr; |
5490 | 36 | if (E->getArg(5)->isNullPointerConstant( |
5491 | 36 | getContext(), Expr::NPC_ValueDependentIsNotNull)) { |
5492 | 9 | EventRet = llvm::ConstantPointerNull::get(PtrTy); |
5493 | 27 | } else { |
5494 | 27 | EventRet = |
5495 | 27 | Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy); |
5496 | 27 | } |
5497 | | |
5498 | 36 | auto Info = |
5499 | 36 | CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); |
5500 | 36 | llvm::Value *Kernel = |
5501 | 36 | Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); |
5502 | 36 | llvm::Value *Block = |
5503 | 36 | Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); |
5504 | | |
5505 | 36 | std::vector<llvm::Type *> ArgTys = { |
5506 | 36 | QueueTy, Int32Ty, RangeTy, Int32Ty, |
5507 | 36 | PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; |
5508 | | |
5509 | 36 | std::vector<llvm::Value *> Args = {Queue, Flags, Range, |
5510 | 36 | NumEvents, EventWaitList, EventRet, |
5511 | 36 | Kernel, Block}; |
5512 | | |
5513 | 36 | if (NumArgs == 7) { |
5514 | | // Has events but no variadics. |
5515 | 18 | Name = "__enqueue_kernel_basic_events"; |
5516 | 18 | llvm::FunctionType *FTy = llvm::FunctionType::get( |
5517 | 18 | Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5518 | 18 | return RValue::get( |
5519 | 18 | EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), |
5520 | 18 | llvm::ArrayRef<llvm::Value *>(Args))); |
5521 | 18 | } |
5522 | | // Has event info and variadics |
5523 | | // Pass the number of variadics to the runtime function too. |
5524 | 18 | Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); |
5525 | 18 | ArgTys.push_back(Int32Ty); |
5526 | 18 | Name = "__enqueue_kernel_events_varargs"; |
5527 | | |
5528 | 18 | llvm::Value *ElemPtr, *TmpSize, *TmpPtr; |
5529 | 18 | std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7); |
5530 | 18 | Args.push_back(ElemPtr); |
5531 | 18 | ArgTys.push_back(ElemPtr->getType()); |
5532 | | |
5533 | 18 | llvm::FunctionType *FTy = llvm::FunctionType::get( |
5534 | 18 | Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); |
5535 | 18 | auto Call = |
5536 | 18 | RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), |
5537 | 18 | llvm::ArrayRef<llvm::Value *>(Args))); |
5538 | 18 | if (TmpSize) |
5539 | 6 | EmitLifetimeEnd(TmpSize, TmpPtr); |
5540 | 18 | return Call; |
5541 | 36 | } |
5542 | 36 | [[fallthrough]]; |
5543 | 0 | } |
5544 | | // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block |
5545 | | // parameter. |
5546 | 19 | case Builtin::BIget_kernel_work_group_size: { |
5547 | 19 | llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( |
5548 | 19 | getContext().getTargetAddressSpace(LangAS::opencl_generic)); |
5549 | 19 | auto Info = |
5550 | 19 | CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); |
5551 | 19 | Value *Kernel = |
5552 | 19 | Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); |
5553 | 19 | Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); |
5554 | 19 | return RValue::get(EmitRuntimeCall( |
5555 | 19 | CGM.CreateRuntimeFunction( |
5556 | 19 | llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, |
5557 | 19 | false), |
5558 | 19 | "__get_kernel_work_group_size_impl"), |
5559 | 19 | {Kernel, Arg})); |
5560 | 0 | } |
5561 | 28 | case Builtin::BIget_kernel_preferred_work_group_size_multiple: { |
5562 | 28 | llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( |
5563 | 28 | getContext().getTargetAddressSpace(LangAS::opencl_generic)); |
5564 | 28 | auto Info = |
5565 | 28 | CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); |
5566 | 28 | Value *Kernel = |
5567 | 28 | Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); |
5568 | 28 | Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); |
5569 | 28 | return RValue::get(EmitRuntimeCall( |
5570 | 28 | CGM.CreateRuntimeFunction( |
5571 | 28 | llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, |
5572 | 28 | false), |
5573 | 28 | "__get_kernel_preferred_work_group_size_multiple_impl"), |
5574 | 28 | {Kernel, Arg})); |
5575 | 0 | } |
5576 | 9 | case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: |
5577 | 18 | case Builtin::BIget_kernel_sub_group_count_for_ndrange: { |
5578 | 18 | llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( |
5579 | 18 | getContext().getTargetAddressSpace(LangAS::opencl_generic)); |
5580 | 18 | LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); |
5581 | 18 | llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer(); |
5582 | 18 | auto Info = |
5583 | 18 | CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); |
5584 | 18 | Value *Kernel = |
5585 | 18 | Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); |
5586 | 18 | Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); |
5587 | 18 | const char *Name = |
5588 | 18 | BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange |
5589 | 18 | ? "__get_kernel_max_sub_group_size_for_ndrange_impl"9 |
5590 | 18 | : "__get_kernel_sub_group_count_for_ndrange_impl"9 ; |
5591 | 18 | return RValue::get(EmitRuntimeCall( |
5592 | 18 | CGM.CreateRuntimeFunction( |
5593 | 18 | llvm::FunctionType::get( |
5594 | 18 | IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, |
5595 | 18 | false), |
5596 | 18 | Name), |
5597 | 18 | {NDRange, Kernel, Block})); |
5598 | 9 | } |
5599 | | |
5600 | 3 | case Builtin::BI__builtin_store_half: |
5601 | 6 | case Builtin::BI__builtin_store_halff: { |
5602 | 6 | Value *Val = EmitScalarExpr(E->getArg(0)); |
5603 | 6 | Address Address = EmitPointerWithAlignment(E->getArg(1)); |
5604 | 6 | Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); |
5605 | 6 | Builder.CreateStore(HalfVal, Address); |
5606 | 6 | return RValue::get(nullptr); |
5607 | 3 | } |
5608 | 3 | case Builtin::BI__builtin_load_half: { |
5609 | 3 | Address Address = EmitPointerWithAlignment(E->getArg(0)); |
5610 | 3 | Value *HalfVal = Builder.CreateLoad(Address); |
5611 | 3 | return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); |
5612 | 3 | } |
5613 | 3 | case Builtin::BI__builtin_load_halff: { |
5614 | 3 | Address Address = EmitPointerWithAlignment(E->getArg(0)); |
5615 | 3 | Value *HalfVal = Builder.CreateLoad(Address); |
5616 | 3 | return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); |
5617 | 3 | } |
5618 | 3.59k | case Builtin::BIprintf: |
5619 | 3.59k | if (getTarget().getTriple().isNVPTX() || |
5620 | 3.59k | getTarget().getTriple().isAMDGCN()3.58k ) { |
5621 | 18 | if (getLangOpts().OpenMPIsTargetDevice) |
5622 | 6 | return EmitOpenMPDevicePrintfCallExpr(E); |
5623 | 12 | if (getTarget().getTriple().isNVPTX()) |
5624 | 0 | return EmitNVPTXDevicePrintfCallExpr(E); |
5625 | 12 | if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP) |
5626 | 12 | return EmitAMDGPUDevicePrintfCallExpr(E); |
5627 | 12 | } |
5628 | | |
5629 | 3.57k | break; |
5630 | 3.57k | case Builtin::BI__builtin_canonicalize: |
5631 | 4 | case Builtin::BI__builtin_canonicalizef: |
5632 | 5 | case Builtin::BI__builtin_canonicalizef16: |
5633 | 7 | case Builtin::BI__builtin_canonicalizel: |
5634 | 7 | return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); |
5635 | | |
5636 | 4 | case Builtin::BI__builtin_thread_pointer: { |
5637 | 4 | if (!getContext().getTargetInfo().isTLSSupported()) |
5638 | 0 | CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); |
5639 | | // Fall through - it's already mapped to the intrinsic by ClangBuiltin. |
5640 | 4 | break; |
5641 | 5 | } |
5642 | 59 | case Builtin::BI__builtin_os_log_format: |
5643 | 59 | return emitBuiltinOSLogFormat(*E); |
5644 | | |
5645 | 17 | case Builtin::BI__xray_customevent: { |
5646 | 17 | if (!ShouldXRayInstrumentFunction()) |
5647 | 0 | return RValue::getIgnored(); |
5648 | | |
5649 | 17 | if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( |
5650 | 17 | XRayInstrKind::Custom)) |
5651 | 7 | return RValue::getIgnored(); |
5652 | | |
5653 | 10 | if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) |
5654 | 10 | if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()2 ) |
5655 | 1 | return RValue::getIgnored(); |
5656 | | |
5657 | 9 | Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); |
5658 | 9 | auto FTy = F->getFunctionType(); |
5659 | 9 | auto Arg0 = E->getArg(0); |
5660 | 9 | auto Arg0Val = EmitScalarExpr(Arg0); |
5661 | 9 | auto Arg0Ty = Arg0->getType(); |
5662 | 9 | auto PTy0 = FTy->getParamType(0); |
5663 | 9 | if (PTy0 != Arg0Val->getType()) { |
5664 | 0 | if (Arg0Ty->isArrayType()) |
5665 | 0 | Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); |
5666 | 0 | else |
5667 | 0 | Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); |
5668 | 0 | } |
5669 | 9 | auto Arg1 = EmitScalarExpr(E->getArg(1)); |
5670 | 9 | auto PTy1 = FTy->getParamType(1); |
5671 | 9 | if (PTy1 != Arg1->getType()) |
5672 | 0 | Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); |
5673 | 9 | return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); |
5674 | 10 | } |
5675 | | |
5676 | 17 | case Builtin::BI__xray_typedevent: { |
5677 | | // TODO: There should be a way to always emit events even if the current |
5678 | | // function is not instrumented. Losing events in a stream can cripple |
5679 | | // a trace. |
5680 | 17 | if (!ShouldXRayInstrumentFunction()) |
5681 | 0 | return RValue::getIgnored(); |
5682 | | |
5683 | 17 | if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( |
5684 | 17 | XRayInstrKind::Typed)) |
5685 | 7 | return RValue::getIgnored(); |
5686 | | |
5687 | 10 | if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) |
5688 | 10 | if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents()2 ) |
5689 | 1 | return RValue::getIgnored(); |
5690 | | |
5691 | 9 | Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent); |
5692 | 9 | auto FTy = F->getFunctionType(); |
5693 | 9 | auto Arg0 = EmitScalarExpr(E->getArg(0)); |
5694 | 9 | auto PTy0 = FTy->getParamType(0); |
5695 | 9 | if (PTy0 != Arg0->getType()) |
5696 | 0 | Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0); |
5697 | 9 | auto Arg1 = E->getArg(1); |
5698 | 9 | auto Arg1Val = EmitScalarExpr(Arg1); |
5699 | 9 | auto Arg1Ty = Arg1->getType(); |
5700 | 9 | auto PTy1 = FTy->getParamType(1); |
5701 | 9 | if (PTy1 != Arg1Val->getType()) { |
5702 | 0 | if (Arg1Ty->isArrayType()) |
5703 | 0 | Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer(); |
5704 | 0 | else |
5705 | 0 | Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1); |
5706 | 0 | } |
5707 | 9 | auto Arg2 = EmitScalarExpr(E->getArg(2)); |
5708 | 9 | auto PTy2 = FTy->getParamType(2); |
5709 | 9 | if (PTy2 != Arg2->getType()) |
5710 | 0 | Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2); |
5711 | 9 | return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2})); |
5712 | 10 | } |
5713 | | |
5714 | 8 | case Builtin::BI__builtin_ms_va_start: |
5715 | 16 | case Builtin::BI__builtin_ms_va_end: |
5716 | 16 | return RValue::get( |
5717 | 16 | EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), |
5718 | 16 | BuiltinID == Builtin::BI__builtin_ms_va_start)); |
5719 | | |
5720 | 6 | case Builtin::BI__builtin_ms_va_copy: { |
5721 | | // Lower this manually. We can't reliably determine whether or not any |
5722 | | // given va_copy() is for a Win64 va_list from the calling convention |
5723 | | // alone, because it's legal to do this from a System V ABI function. |
5724 | | // With opaque pointer types, we won't have enough information in LLVM |
5725 | | // IR to determine this from the argument types, either. Best to do it |
5726 | | // now, while we have enough information. |
5727 | 6 | Address DestAddr = EmitMSVAListRef(E->getArg(0)); |
5728 | 6 | Address SrcAddr = EmitMSVAListRef(E->getArg(1)); |
5729 | | |
5730 | 6 | llvm::Type *BPP = Int8PtrPtrTy; |
5731 | | |
5732 | 6 | DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), |
5733 | 6 | Int8PtrTy, DestAddr.getAlignment()); |
5734 | 6 | SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), |
5735 | 6 | Int8PtrTy, SrcAddr.getAlignment()); |
5736 | | |
5737 | 6 | Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); |
5738 | 6 | return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); |
5739 | 8 | } |
5740 | | |
5741 | 4 | case Builtin::BI__builtin_get_device_side_mangled_name: { |
5742 | 4 | auto Name = CGM.getCUDARuntime().getDeviceSideName( |
5743 | 4 | cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl()); |
5744 | 4 | auto Str = CGM.GetAddrOfConstantCString(Name, ""); |
5745 | 4 | llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0), |
5746 | 4 | llvm::ConstantInt::get(SizeTy, 0)}; |
5747 | 4 | auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(), |
5748 | 4 | Str.getPointer(), Zeros); |
5749 | 4 | return RValue::get(Ptr); |
5750 | 8 | } |
5751 | 112k | } |
5752 | | |
5753 | | // If this is an alias for a lib function (e.g. __builtin_sin), emit |
5754 | | // the call using the normal call path, but using the unmangled |
5755 | | // version of the function name. |
5756 | 88.7k | if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) |
5757 | 1.81k | return emitLibraryCall(*this, FD, E, |
5758 | 1.81k | CGM.getBuiltinLibFunction(FD, BuiltinID)); |
5759 | | |
5760 | | // If this is a predefined lib function (e.g. malloc), emit the call |
5761 | | // using exactly the normal call path. |
5762 | 86.9k | if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) |
5763 | 11.8k | return emitLibraryCall(*this, FD, E, |
5764 | 11.8k | cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); |
5765 | | |
5766 | | // Check that a call to a target specific builtin has the correct target |
5767 | | // features. |
5768 | | // This is down here to avoid non-target specific builtins, however, if |
5769 | | // generic builtins start to require generic target features then we |
5770 | | // can move this up to the beginning of the function. |
5771 | 75.1k | checkTargetFeatures(E, FD); |
5772 | | |
5773 | 75.1k | if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) |
5774 | 13.4k | LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); |
5775 | | |
5776 | | // See if we have a target specific intrinsic. |
5777 | 75.1k | StringRef Name = getContext().BuiltinInfo.getName(BuiltinID); |
5778 | 75.1k | Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; |
5779 | 75.1k | StringRef Prefix = |
5780 | 75.1k | llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); |
5781 | 75.1k | if (!Prefix.empty()) { |
5782 | 75.1k | IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name); |
5783 | | // NOTE we don't need to perform a compatibility flag check here since the |
5784 | | // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the |
5785 | | // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. |
5786 | 75.1k | if (IntrinsicID == Intrinsic::not_intrinsic) |
5787 | 66.0k | IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); |
5788 | 75.1k | } |
5789 | | |
5790 | 75.1k | if (IntrinsicID != Intrinsic::not_intrinsic) { |
5791 | 9.03k | SmallVector<Value*, 16> Args; |
5792 | | |
5793 | | // Find out if any arguments are required to be integer constant |
5794 | | // expressions. |
5795 | 9.03k | unsigned ICEArguments = 0; |
5796 | 9.03k | ASTContext::GetBuiltinTypeError Error; |
5797 | 9.03k | getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); |
5798 | 9.03k | assert(Error == ASTContext::GE_None && "Should not codegen an error"); |
5799 | | |
5800 | 9.03k | Function *F = CGM.getIntrinsic(IntrinsicID); |
5801 | 9.03k | llvm::FunctionType *FTy = F->getFunctionType(); |
5802 | | |
5803 | 31.8k | for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i22.7k ) { |
5804 | 22.7k | Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E); |
5805 | | // If the intrinsic arg type is different from the builtin arg type |
5806 | | // we need to do a bit cast. |
5807 | 22.7k | llvm::Type *PTy = FTy->getParamType(i); |
5808 | 22.7k | if (PTy != ArgValue->getType()) { |
5809 | | // XXX - vector of pointers? |
5810 | 731 | if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) { |
5811 | 0 | if (PtrTy->getAddressSpace() != |
5812 | 0 | ArgValue->getType()->getPointerAddressSpace()) { |
5813 | 0 | ArgValue = Builder.CreateAddrSpaceCast( |
5814 | 0 | ArgValue, llvm::PointerType::get(getLLVMContext(), |
5815 | 0 | PtrTy->getAddressSpace())); |
5816 | 0 | } |
5817 | 0 | } |
5818 | | |
5819 | 731 | assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && |
5820 | 731 | "Must be able to losslessly bit cast to param"); |
5821 | | // Cast vector type (e.g., v256i32) to x86_amx, this only happen |
5822 | | // in amx intrinsics. |
5823 | 731 | if (PTy->isX86_AMXTy()) |
5824 | 25 | ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile, |
5825 | 25 | {ArgValue->getType()}, {ArgValue}); |
5826 | 706 | else |
5827 | 706 | ArgValue = Builder.CreateBitCast(ArgValue, PTy); |
5828 | 731 | } |
5829 | | |
5830 | 22.7k | Args.push_back(ArgValue); |
5831 | 22.7k | } |
5832 | | |
5833 | 9.03k | Value *V = Builder.CreateCall(F, Args); |
5834 | 9.03k | QualType BuiltinRetType = E->getType(); |
5835 | | |
5836 | 9.03k | llvm::Type *RetTy = VoidTy; |
5837 | 9.03k | if (!BuiltinRetType->isVoidType()) |
5838 | 8.40k | RetTy = ConvertType(BuiltinRetType); |
5839 | | |
5840 | 9.03k | if (RetTy != V->getType()) { |
5841 | | // XXX - vector of pointers? |
5842 | 467 | if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) { |
5843 | 0 | if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { |
5844 | 0 | V = Builder.CreateAddrSpaceCast( |
5845 | 0 | V, llvm::PointerType::get(getLLVMContext(), |
5846 | 0 | PtrTy->getAddressSpace())); |
5847 | 0 | } |
5848 | 0 | } |
5849 | | |
5850 | 467 | assert(V->getType()->canLosslesslyBitCastTo(RetTy) && |
5851 | 467 | "Must be able to losslessly bit cast result type"); |
5852 | | // Cast x86_amx to vector type (e.g., v256i32), this only happen |
5853 | | // in amx intrinsics. |
5854 | 467 | if (V->getType()->isX86_AMXTy()) |
5855 | 11 | V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy}, |
5856 | 11 | {V}); |
5857 | 456 | else |
5858 | 456 | V = Builder.CreateBitCast(V, RetTy); |
5859 | 467 | } |
5860 | | |
5861 | 9.03k | if (RetTy->isVoidTy()) |
5862 | 636 | return RValue::get(nullptr); |
5863 | | |
5864 | 8.40k | return RValue::get(V); |
5865 | 9.03k | } |
5866 | | |
5867 | | // Some target-specific builtins can have aggregate return values, e.g. |
5868 | | // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force |
5869 | | // ReturnValue to be non-null, so that the target-specific emission code can |
5870 | | // always just emit into it. |
5871 | 66.0k | TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); |
5872 | 66.0k | if (EvalKind == TEK_Aggregate && ReturnValue.isNull()8 ) { |
5873 | 2 | Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); |
5874 | 2 | ReturnValue = ReturnValueSlot(DestPtr, false); |
5875 | 2 | } |
5876 | | |
5877 | | // Now see if we can emit a target-specific builtin. |
5878 | 66.0k | if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { |
5879 | 66.0k | switch (EvalKind) { |
5880 | 66.0k | case TEK_Scalar: |
5881 | 66.0k | if (V->getType()->isVoidTy()) |
5882 | 5.95k | return RValue::get(nullptr); |
5883 | 60.1k | return RValue::get(V); |
5884 | 8 | case TEK_Aggregate: |
5885 | 8 | return RValue::getAggregate(ReturnValue.getValue(), |
5886 | 8 | ReturnValue.isVolatile()); |
5887 | 0 | case TEK_Complex: |
5888 | 0 | llvm_unreachable("No current target builtin returns complex"); |
5889 | 66.0k | } |
5890 | 0 | llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); |
5891 | 0 | } |
5892 | | |
5893 | 1 | if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) |
5894 | 1 | return EmitHipStdParUnsupportedBuiltin(this, FD); |
5895 | | |
5896 | 0 | ErrorUnsupported(E, "builtin function"); |
5897 | | |
5898 | | // Unknown builtin, for now just dump it out and return undef. |
5899 | 0 | return GetUndefRValue(E->getType()); |
5900 | 1 | } |
5901 | | |
5902 | | static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, |
5903 | | unsigned BuiltinID, const CallExpr *E, |
5904 | | ReturnValueSlot ReturnValue, |
5905 | 66.0k | llvm::Triple::ArchType Arch) { |
5906 | | // When compiling in HipStdPar mode we have to be conservative in rejecting |
5907 | | // target specific features in the FE, and defer the possible error to the |
5908 | | // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is |
5909 | | // referenced by an accelerator executable function, we emit an error. |
5910 | | // Returning nullptr here leads to the builtin being handled in |
5911 | | // EmitStdParUnsupportedBuiltin. |
5912 | 66.0k | if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice1 && |
5913 | 66.0k | Arch != CGF->getTarget().getTriple().getArch()1 ) |
5914 | 1 | return nullptr; |
5915 | | |
5916 | 66.0k | switch (Arch) { |
5917 | 640 | case llvm::Triple::arm: |
5918 | 640 | case llvm::Triple::armeb: |
5919 | 6.39k | case llvm::Triple::thumb: |
5920 | 6.67k | case llvm::Triple::thumbeb: |
5921 | 6.67k | return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch); |
5922 | 47.8k | case llvm::Triple::aarch64: |
5923 | 47.9k | case llvm::Triple::aarch64_32: |
5924 | 47.9k | case llvm::Triple::aarch64_be: |
5925 | 47.9k | return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); |
5926 | 0 | case llvm::Triple::bpfeb: |
5927 | 0 | case llvm::Triple::bpfel: |
5928 | 0 | return CGF->EmitBPFBuiltinExpr(BuiltinID, E); |
5929 | 959 | case llvm::Triple::x86: |
5930 | 9.30k | case llvm::Triple::x86_64: |
5931 | 9.30k | return CGF->EmitX86BuiltinExpr(BuiltinID, E); |
5932 | 5 | case llvm::Triple::ppc: |
5933 | 5 | case llvm::Triple::ppcle: |
5934 | 131 | case llvm::Triple::ppc64: |
5935 | 269 | case llvm::Triple::ppc64le: |
5936 | 269 | return CGF->EmitPPCBuiltinExpr(BuiltinID, E); |
5937 | 4 | case llvm::Triple::r600: |
5938 | 439 | case llvm::Triple::amdgcn: |
5939 | 439 | return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); |
5940 | 0 | case llvm::Triple::systemz: |
5941 | 0 | return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); |
5942 | 463 | case llvm::Triple::nvptx: |
5943 | 1.09k | case llvm::Triple::nvptx64: |
5944 | 1.09k | return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); |
5945 | 127 | case llvm::Triple::wasm32: |
5946 | 283 | case llvm::Triple::wasm64: |
5947 | 283 | return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); |
5948 | 0 | case llvm::Triple::hexagon: |
5949 | 0 | return CGF->EmitHexagonBuiltinExpr(BuiltinID, E); |
5950 | 23 | case llvm::Triple::riscv32: |
5951 | 52 | case llvm::Triple::riscv64: |
5952 | 52 | return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); |
5953 | 0 | default: |
5954 | 0 | return nullptr; |
5955 | 66.0k | } |
5956 | 66.0k | } |
5957 | | |
5958 | | Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, |
5959 | | const CallExpr *E, |
5960 | 66.0k | ReturnValueSlot ReturnValue) { |
5961 | 66.0k | if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { |
5962 | 1 | assert(getContext().getAuxTargetInfo() && "Missing aux target info"); |
5963 | 1 | return EmitTargetArchBuiltinExpr( |
5964 | 1 | this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, |
5965 | 1 | ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); |
5966 | 1 | } |
5967 | | |
5968 | 66.0k | return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue, |
5969 | 66.0k | getTarget().getTriple().getArch()); |
5970 | 66.0k | } |
5971 | | |
5972 | | static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF, |
5973 | | NeonTypeFlags TypeFlags, |
5974 | | bool HasLegalHalfType = true, |
5975 | | bool V1Ty = false, |
5976 | 7.47k | bool AllowBFloatArgsAndRet = true) { |
5977 | 7.47k | int IsQuad = TypeFlags.isQuad(); |
5978 | 7.47k | switch (TypeFlags.getEltType()) { |
5979 | 1.15k | case NeonTypeFlags::Int8: |
5980 | 1.43k | case NeonTypeFlags::Poly8: |
5981 | 1.43k | return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 10 : (8 << IsQuad)); |
5982 | 1.58k | case NeonTypeFlags::Int16: |
5983 | 1.80k | case NeonTypeFlags::Poly16: |
5984 | 1.80k | return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 10 : (4 << IsQuad)); |
5985 | 210 | case NeonTypeFlags::BFloat16: |
5986 | 210 | if (AllowBFloatArgsAndRet) |
5987 | 205 | return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 10 : (4 << IsQuad)); |
5988 | 5 | else |
5989 | 5 | return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 10 : (4 << IsQuad)); |
5990 | 477 | case NeonTypeFlags::Float16: |
5991 | 477 | if (HasLegalHalfType) |
5992 | 441 | return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 10 : (4 << IsQuad)); |
5993 | 36 | else |
5994 | 36 | return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 10 : (4 << IsQuad)); |
5995 | 1.74k | case NeonTypeFlags::Int32: |
5996 | 1.74k | return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 10 : (2 << IsQuad)); |
5997 | 770 | case NeonTypeFlags::Int64: |
5998 | 862 | case NeonTypeFlags::Poly64: |
5999 | 862 | return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 10 : (1 << IsQuad)); |
6000 | 12 | case NeonTypeFlags::Poly128: |
6001 | | // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. |
6002 | | // There is a lot of i128 and f128 API missing. |
6003 | | // so we use v16i8 to represent poly128 and get pattern matched. |
6004 | 12 | return llvm::FixedVectorType::get(CGF->Int8Ty, 16); |
6005 | 627 | case NeonTypeFlags::Float32: |
6006 | 627 | return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 10 : (2 << IsQuad)); |
6007 | 300 | case NeonTypeFlags::Float64: |
6008 | 300 | return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 10 : (1 << IsQuad)); |
6009 | 7.47k | } |
6010 | 0 | llvm_unreachable("Unknown vector element type!"); |
6011 | 0 | } |
6012 | | |
6013 | | static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, |
6014 | 142 | NeonTypeFlags IntTypeFlags) { |
6015 | 142 | int IsQuad = IntTypeFlags.isQuad(); |
6016 | 142 | switch (IntTypeFlags.getEltType()) { |
6017 | 54 | case NeonTypeFlags::Int16: |
6018 | 54 | return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad)); |
6019 | 56 | case NeonTypeFlags::Int32: |
6020 | 56 | return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad)); |
6021 | 32 | case NeonTypeFlags::Int64: |
6022 | 32 | return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad)); |
6023 | 0 | default: |
6024 | 0 | llvm_unreachable("Type can't be converted to floating-point!"); |
6025 | 142 | } |
6026 | 142 | } |
6027 | | |
6028 | | Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, |
6029 | 536 | const ElementCount &Count) { |
6030 | 536 | Value *SV = llvm::ConstantVector::getSplat(Count, C); |
6031 | 536 | return Builder.CreateShuffleVector(V, V, SV, "lane"); |
6032 | 536 | } |
6033 | | |
6034 | 75 | Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { |
6035 | 75 | ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount(); |
6036 | 75 | return EmitNeonSplat(V, C, EC); |
6037 | 75 | } |
6038 | | |
6039 | | Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, |
6040 | | const char *name, |
6041 | 2.73k | unsigned shift, bool rightshift) { |
6042 | 2.73k | unsigned j = 0; |
6043 | 2.73k | for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); |
6044 | 9.39k | ai != ae; ++ai, ++j6.65k ) { |
6045 | 6.65k | if (F->isConstrainedFPIntrinsic()) |
6046 | 172 | if (ai->getType()->isMetadataTy()) |
6047 | 92 | continue; |
6048 | 6.56k | if (shift > 0 && shift == j242 ) |
6049 | 121 | Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); |
6050 | 6.44k | else |
6051 | 6.44k | Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); |
6052 | 6.56k | } |
6053 | | |
6054 | 2.73k | if (F->isConstrainedFPIntrinsic()) |
6055 | 56 | return Builder.CreateConstrainedFPCall(F, Ops, name); |
6056 | 2.68k | else |
6057 | 2.68k | return Builder.CreateCall(F, Ops, name); |
6058 | 2.73k | } |
6059 | | |
6060 | | Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, |
6061 | 338 | bool neg) { |
6062 | 338 | int SV = cast<ConstantInt>(V)->getSExtValue(); |
6063 | 338 | return ConstantInt::get(Ty, neg ? -SV109 : SV229 ); |
6064 | 338 | } |
6065 | | |
6066 | | // Right-shift a vector by a constant. |
6067 | | Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, |
6068 | | llvm::Type *Ty, bool usgn, |
6069 | 76 | const char *name) { |
6070 | 76 | llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); |
6071 | | |
6072 | 76 | int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); |
6073 | 76 | int EltSize = VTy->getScalarSizeInBits(); |
6074 | | |
6075 | 76 | Vec = Builder.CreateBitCast(Vec, Ty); |
6076 | | |
6077 | | // lshr/ashr are undefined when the shift amount is equal to the vector |
6078 | | // element size. |
6079 | 76 | if (ShiftAmt == EltSize) { |
6080 | 8 | if (usgn) { |
6081 | | // Right-shifting an unsigned value by its size yields 0. |
6082 | 4 | return llvm::ConstantAggregateZero::get(VTy); |
6083 | 4 | } else { |
6084 | | // Right-shifting a signed value by its size is equivalent |
6085 | | // to a shift of size-1. |
6086 | 4 | --ShiftAmt; |
6087 | 4 | Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); |
6088 | 4 | } |
6089 | 8 | } |
6090 | | |
6091 | 72 | Shift = EmitNeonShiftVector(Shift, Ty, false); |
6092 | 72 | if (usgn) |
6093 | 36 | return Builder.CreateLShr(Vec, Shift, name); |
6094 | 36 | else |
6095 | 36 | return Builder.CreateAShr(Vec, Shift, name); |
6096 | 72 | } |
6097 | | |
6098 | | enum { |
6099 | | AddRetType = (1 << 0), |
6100 | | Add1ArgType = (1 << 1), |
6101 | | Add2ArgTypes = (1 << 2), |
6102 | | |
6103 | | VectorizeRetType = (1 << 3), |
6104 | | VectorizeArgTypes = (1 << 4), |
6105 | | |
6106 | | InventFloatType = (1 << 5), |
6107 | | UnsignedAlts = (1 << 6), |
6108 | | |
6109 | | Use64BitVectors = (1 << 7), |
6110 | | Use128BitVectors = (1 << 8), |
6111 | | |
6112 | | Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, |
6113 | | VectorRet = AddRetType | VectorizeRetType, |
6114 | | VectorRetGetArgs01 = |
6115 | | AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, |
6116 | | FpCmpzModifiers = |
6117 | | AddRetType | VectorizeRetType | Add1ArgType | InventFloatType |
6118 | | }; |
6119 | | |
6120 | | namespace { |
6121 | | struct ARMVectorIntrinsicInfo { |
6122 | | const char *NameHint; |
6123 | | unsigned BuiltinID; |
6124 | | unsigned LLVMIntrinsic; |
6125 | | unsigned AltLLVMIntrinsic; |
6126 | | uint64_t TypeModifier; |
6127 | | |
6128 | 579k | bool operator<(unsigned RHSBuiltinID) const { |
6129 | 579k | return BuiltinID < RHSBuiltinID; |
6130 | 579k | } |
6131 | 12.2M | bool operator<(const ARMVectorIntrinsicInfo &TE) const { |
6132 | 12.2M | return BuiltinID < TE.BuiltinID; |
6133 | 12.2M | } |
6134 | | }; |
6135 | | } // end anonymous namespace |
6136 | | |
6137 | | #define NEONMAP0(NameBase) \ |
6138 | | { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } |
6139 | | |
6140 | | #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ |
6141 | | { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ |
6142 | | Intrinsic::LLVMIntrinsic, 0, TypeModifier } |
6143 | | |
6144 | | #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ |
6145 | | { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ |
6146 | | Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ |
6147 | | TypeModifier } |
6148 | | |
6149 | | static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { |
6150 | | NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0), |
6151 | | NEONMAP0(splat_lane_v), |
6152 | | NEONMAP0(splat_laneq_v), |
6153 | | NEONMAP0(splatq_lane_v), |
6154 | | NEONMAP0(splatq_laneq_v), |
6155 | | NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), |
6156 | | NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), |
6157 | | NEONMAP1(vabs_v, arm_neon_vabs, 0), |
6158 | | NEONMAP1(vabsq_v, arm_neon_vabs, 0), |
6159 | | NEONMAP0(vadd_v), |
6160 | | NEONMAP0(vaddhn_v), |
6161 | | NEONMAP0(vaddq_v), |
6162 | | NEONMAP1(vaesdq_u8, arm_neon_aesd, 0), |
6163 | | NEONMAP1(vaeseq_u8, arm_neon_aese, 0), |
6164 | | NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0), |
6165 | | NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0), |
6166 | | NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0), |
6167 | | NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0), |
6168 | | NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0), |
6169 | | NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0), |
6170 | | NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0), |
6171 | | NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), |
6172 | | NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), |
6173 | | NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), |
6174 | | NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), |
6175 | | NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), |
6176 | | NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), |
6177 | | NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), |
6178 | | NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), |
6179 | | NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType), |
6180 | | NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), |
6181 | | NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), |
6182 | | NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType), |
6183 | | NEONMAP1(vcage_v, arm_neon_vacge, 0), |
6184 | | NEONMAP1(vcageq_v, arm_neon_vacge, 0), |
6185 | | NEONMAP1(vcagt_v, arm_neon_vacgt, 0), |
6186 | | NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), |
6187 | | NEONMAP1(vcale_v, arm_neon_vacge, 0), |
6188 | | NEONMAP1(vcaleq_v, arm_neon_vacge, 0), |
6189 | | NEONMAP1(vcalt_v, arm_neon_vacgt, 0), |
6190 | | NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), |
6191 | | NEONMAP0(vceqz_v), |
6192 | | NEONMAP0(vceqzq_v), |
6193 | | NEONMAP0(vcgez_v), |
6194 | | NEONMAP0(vcgezq_v), |
6195 | | NEONMAP0(vcgtz_v), |
6196 | | NEONMAP0(vcgtzq_v), |
6197 | | NEONMAP0(vclez_v), |
6198 | | NEONMAP0(vclezq_v), |
6199 | | NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), |
6200 | | NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), |
6201 | | NEONMAP0(vcltz_v), |
6202 | | NEONMAP0(vcltzq_v), |
6203 | | NEONMAP1(vclz_v, ctlz, Add1ArgType), |
6204 | | NEONMAP1(vclzq_v, ctlz, Add1ArgType), |
6205 | | NEONMAP1(vcnt_v, ctpop, Add1ArgType), |
6206 | | NEONMAP1(vcntq_v, ctpop, Add1ArgType), |
6207 | | NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), |
6208 | | NEONMAP0(vcvt_f16_s16), |
6209 | | NEONMAP0(vcvt_f16_u16), |
6210 | | NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), |
6211 | | NEONMAP0(vcvt_f32_v), |
6212 | | NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0), |
6213 | | NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0), |
6214 | | NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), |
6215 | | NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0), |
6216 | | NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), |
6217 | | NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), |
6218 | | NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0), |
6219 | | NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), |
6220 | | NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), |
6221 | | NEONMAP0(vcvt_s16_f16), |
6222 | | NEONMAP0(vcvt_s32_v), |
6223 | | NEONMAP0(vcvt_s64_v), |
6224 | | NEONMAP0(vcvt_u16_f16), |
6225 | | NEONMAP0(vcvt_u32_v), |
6226 | | NEONMAP0(vcvt_u64_v), |
6227 | | NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0), |
6228 | | NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), |
6229 | | NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), |
6230 | | NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0), |
6231 | | NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), |
6232 | | NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), |
6233 | | NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0), |
6234 | | NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), |
6235 | | NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), |
6236 | | NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0), |
6237 | | NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), |
6238 | | NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), |
6239 | | NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0), |
6240 | | NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0), |
6241 | | NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), |
6242 | | NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), |
6243 | | NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0), |
6244 | | NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), |
6245 | | NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), |
6246 | | NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0), |
6247 | | NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), |
6248 | | NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), |
6249 | | NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0), |
6250 | | NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), |
6251 | | NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), |
6252 | | NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0), |
6253 | | NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), |
6254 | | NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), |
6255 | | NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0), |
6256 | | NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), |
6257 | | NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), |
6258 | | NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0), |
6259 | | NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), |
6260 | | NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), |
6261 | | NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0), |
6262 | | NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), |
6263 | | NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), |
6264 | | NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0), |
6265 | | NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), |
6266 | | NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), |
6267 | | NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0), |
6268 | | NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), |
6269 | | NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), |
6270 | | NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0), |
6271 | | NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), |
6272 | | NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), |
6273 | | NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0), |
6274 | | NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), |
6275 | | NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), |
6276 | | NEONMAP0(vcvtq_f16_s16), |
6277 | | NEONMAP0(vcvtq_f16_u16), |
6278 | | NEONMAP0(vcvtq_f32_v), |
6279 | | NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0), |
6280 | | NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0), |
6281 | | NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), |
6282 | | NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0), |
6283 | | NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), |
6284 | | NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), |
6285 | | NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0), |
6286 | | NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), |
6287 | | NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), |
6288 | | NEONMAP0(vcvtq_s16_f16), |
6289 | | NEONMAP0(vcvtq_s32_v), |
6290 | | NEONMAP0(vcvtq_s64_v), |
6291 | | NEONMAP0(vcvtq_u16_f16), |
6292 | | NEONMAP0(vcvtq_u32_v), |
6293 | | NEONMAP0(vcvtq_u64_v), |
6294 | | NEONMAP1(vdot_s32, arm_neon_sdot, 0), |
6295 | | NEONMAP1(vdot_u32, arm_neon_udot, 0), |
6296 | | NEONMAP1(vdotq_s32, arm_neon_sdot, 0), |
6297 | | NEONMAP1(vdotq_u32, arm_neon_udot, 0), |
6298 | | NEONMAP0(vext_v), |
6299 | | NEONMAP0(vextq_v), |
6300 | | NEONMAP0(vfma_v), |
6301 | | NEONMAP0(vfmaq_v), |
6302 | | NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), |
6303 | | NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), |
6304 | | NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), |
6305 | | NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), |
6306 | | NEONMAP0(vld1_dup_v), |
6307 | | NEONMAP1(vld1_v, arm_neon_vld1, 0), |
6308 | | NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0), |
6309 | | NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0), |
6310 | | NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0), |
6311 | | NEONMAP0(vld1q_dup_v), |
6312 | | NEONMAP1(vld1q_v, arm_neon_vld1, 0), |
6313 | | NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0), |
6314 | | NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0), |
6315 | | NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0), |
6316 | | NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0), |
6317 | | NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), |
6318 | | NEONMAP1(vld2_v, arm_neon_vld2, 0), |
6319 | | NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0), |
6320 | | NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), |
6321 | | NEONMAP1(vld2q_v, arm_neon_vld2, 0), |
6322 | | NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0), |
6323 | | NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), |
6324 | | NEONMAP1(vld3_v, arm_neon_vld3, 0), |
6325 | | NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0), |
6326 | | NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), |
6327 | | NEONMAP1(vld3q_v, arm_neon_vld3, 0), |
6328 | | NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0), |
6329 | | NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), |
6330 | | NEONMAP1(vld4_v, arm_neon_vld4, 0), |
6331 | | NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0), |
6332 | | NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), |
6333 | | NEONMAP1(vld4q_v, arm_neon_vld4, 0), |
6334 | | NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), |
6335 | | NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), |
6336 | | NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), |
6337 | | NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), |
6338 | | NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), |
6339 | | NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), |
6340 | | NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), |
6341 | | NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), |
6342 | | NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0), |
6343 | | NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0), |
6344 | | NEONMAP0(vmovl_v), |
6345 | | NEONMAP0(vmovn_v), |
6346 | | NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), |
6347 | | NEONMAP0(vmull_v), |
6348 | | NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), |
6349 | | NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), |
6350 | | NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), |
6351 | | NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), |
6352 | | NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), |
6353 | | NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), |
6354 | | NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), |
6355 | | NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), |
6356 | | NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), |
6357 | | NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), |
6358 | | NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), |
6359 | | NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), |
6360 | | NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), |
6361 | | NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), |
6362 | | NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0), |
6363 | | NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), |
6364 | | NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), |
6365 | | NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), |
6366 | | NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), |
6367 | | NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), |
6368 | | NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), |
6369 | | NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), |
6370 | | NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType), |
6371 | | NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType), |
6372 | | NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType), |
6373 | | NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType), |
6374 | | NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType), |
6375 | | NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType), |
6376 | | NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType), |
6377 | | NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType), |
6378 | | NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), |
6379 | | NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), |
6380 | | NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), |
6381 | | NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), |
6382 | | NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), |
6383 | | NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), |
6384 | | NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), |
6385 | | NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), |
6386 | | NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), |
6387 | | NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), |
6388 | | NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), |
6389 | | NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), |
6390 | | NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), |
6391 | | NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), |
6392 | | NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), |
6393 | | NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), |
6394 | | NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), |
6395 | | NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), |
6396 | | NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), |
6397 | | NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), |
6398 | | NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), |
6399 | | NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), |
6400 | | NEONMAP0(vrndi_v), |
6401 | | NEONMAP0(vrndiq_v), |
6402 | | NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), |
6403 | | NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), |
6404 | | NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), |
6405 | | NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), |
6406 | | NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), |
6407 | | NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), |
6408 | | NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), |
6409 | | NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), |
6410 | | NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), |
6411 | | NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), |
6412 | | NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), |
6413 | | NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), |
6414 | | NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), |
6415 | | NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), |
6416 | | NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), |
6417 | | NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), |
6418 | | NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), |
6419 | | NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), |
6420 | | NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0), |
6421 | | NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0), |
6422 | | NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0), |
6423 | | NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0), |
6424 | | NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0), |
6425 | | NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0), |
6426 | | NEONMAP0(vshl_n_v), |
6427 | | NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), |
6428 | | NEONMAP0(vshll_n_v), |
6429 | | NEONMAP0(vshlq_n_v), |
6430 | | NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), |
6431 | | NEONMAP0(vshr_n_v), |
6432 | | NEONMAP0(vshrn_n_v), |
6433 | | NEONMAP0(vshrq_n_v), |
6434 | | NEONMAP1(vst1_v, arm_neon_vst1, 0), |
6435 | | NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0), |
6436 | | NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0), |
6437 | | NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0), |
6438 | | NEONMAP1(vst1q_v, arm_neon_vst1, 0), |
6439 | | NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0), |
6440 | | NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0), |
6441 | | NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0), |
6442 | | NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), |
6443 | | NEONMAP1(vst2_v, arm_neon_vst2, 0), |
6444 | | NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), |
6445 | | NEONMAP1(vst2q_v, arm_neon_vst2, 0), |
6446 | | NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), |
6447 | | NEONMAP1(vst3_v, arm_neon_vst3, 0), |
6448 | | NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), |
6449 | | NEONMAP1(vst3q_v, arm_neon_vst3, 0), |
6450 | | NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), |
6451 | | NEONMAP1(vst4_v, arm_neon_vst4, 0), |
6452 | | NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), |
6453 | | NEONMAP1(vst4q_v, arm_neon_vst4, 0), |
6454 | | NEONMAP0(vsubhn_v), |
6455 | | NEONMAP0(vtrn_v), |
6456 | | NEONMAP0(vtrnq_v), |
6457 | | NEONMAP0(vtst_v), |
6458 | | NEONMAP0(vtstq_v), |
6459 | | NEONMAP1(vusdot_s32, arm_neon_usdot, 0), |
6460 | | NEONMAP1(vusdotq_s32, arm_neon_usdot, 0), |
6461 | | NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0), |
6462 | | NEONMAP0(vuzp_v), |
6463 | | NEONMAP0(vuzpq_v), |
6464 | | NEONMAP0(vzip_v), |
6465 | | NEONMAP0(vzipq_v) |
6466 | | }; |
6467 | | |
6468 | | static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { |
6469 | | NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0), |
6470 | | NEONMAP0(splat_lane_v), |
6471 | | NEONMAP0(splat_laneq_v), |
6472 | | NEONMAP0(splatq_lane_v), |
6473 | | NEONMAP0(splatq_laneq_v), |
6474 | | NEONMAP1(vabs_v, aarch64_neon_abs, 0), |
6475 | | NEONMAP1(vabsq_v, aarch64_neon_abs, 0), |
6476 | | NEONMAP0(vadd_v), |
6477 | | NEONMAP0(vaddhn_v), |
6478 | | NEONMAP0(vaddq_p128), |
6479 | | NEONMAP0(vaddq_v), |
6480 | | NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0), |
6481 | | NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0), |
6482 | | NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0), |
6483 | | NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0), |
6484 | | NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6485 | | NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6486 | | NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6487 | | NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6488 | | NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6489 | | NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6490 | | NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6491 | | NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), |
6492 | | NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0), |
6493 | | NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0), |
6494 | | NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), |
6495 | | NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), |
6496 | | NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), |
6497 | | NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), |
6498 | | NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), |
6499 | | NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), |
6500 | | NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), |
6501 | | NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), |
6502 | | NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), |
6503 | | NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType), |
6504 | | NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), |
6505 | | NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), |
6506 | | NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType), |
6507 | | NEONMAP1(vcage_v, aarch64_neon_facge, 0), |
6508 | | NEONMAP1(vcageq_v, aarch64_neon_facge, 0), |
6509 | | NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), |
6510 | | NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), |
6511 | | NEONMAP1(vcale_v, aarch64_neon_facge, 0), |
6512 | | NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), |
6513 | | NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), |
6514 | | NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), |
6515 | | NEONMAP0(vceqz_v), |
6516 | | NEONMAP0(vceqzq_v), |
6517 | | NEONMAP0(vcgez_v), |
6518 | | NEONMAP0(vcgezq_v), |
6519 | | NEONMAP0(vcgtz_v), |
6520 | | NEONMAP0(vcgtzq_v), |
6521 | | NEONMAP0(vclez_v), |
6522 | | NEONMAP0(vclezq_v), |
6523 | | NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), |
6524 | | NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), |
6525 | | NEONMAP0(vcltz_v), |
6526 | | NEONMAP0(vcltzq_v), |
6527 | | NEONMAP1(vclz_v, ctlz, Add1ArgType), |
6528 | | NEONMAP1(vclzq_v, ctlz, Add1ArgType), |
6529 | | NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType), |
6530 | | NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType), |
6531 | | NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), |
6532 | | NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), |
6533 | | NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), |
6534 | | NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), |
6535 | | NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), |
6536 | | NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), |
6537 | | NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType), |
6538 | | NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType), |
6539 | | NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType), |
6540 | | NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), |
6541 | | NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), |
6542 | | NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType), |
6543 | | NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), |
6544 | | NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), |
6545 | | NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType), |
6546 | | NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), |
6547 | | NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), |
6548 | | NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType), |
6549 | | NEONMAP1(vcnt_v, ctpop, Add1ArgType), |
6550 | | NEONMAP1(vcntq_v, ctpop, Add1ArgType), |
6551 | | NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), |
6552 | | NEONMAP0(vcvt_f16_s16), |
6553 | | NEONMAP0(vcvt_f16_u16), |
6554 | | NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), |
6555 | | NEONMAP0(vcvt_f32_v), |
6556 | | NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), |
6557 | | NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), |
6558 | | NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), |
6559 | | NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), |
6560 | | NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), |
6561 | | NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), |
6562 | | NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), |
6563 | | NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), |
6564 | | NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), |
6565 | | NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), |
6566 | | NEONMAP0(vcvtq_f16_s16), |
6567 | | NEONMAP0(vcvtq_f16_u16), |
6568 | | NEONMAP0(vcvtq_f32_v), |
6569 | | NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0), |
6570 | | NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), |
6571 | | NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), |
6572 | | NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), |
6573 | | NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), |
6574 | | NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), |
6575 | | NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), |
6576 | | NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), |
6577 | | NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), |
6578 | | NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), |
6579 | | NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), |
6580 | | NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), |
6581 | | NEONMAP1(vdot_s32, aarch64_neon_sdot, 0), |
6582 | | NEONMAP1(vdot_u32, aarch64_neon_udot, 0), |
6583 | | NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0), |
6584 | | NEONMAP1(vdotq_u32, aarch64_neon_udot, 0), |
6585 | | NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6586 | | NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6587 | | NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6588 | | NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6589 | | NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6590 | | NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6591 | | NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6592 | | NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), |
6593 | | NEONMAP0(vext_v), |
6594 | | NEONMAP0(vextq_v), |
6595 | | NEONMAP0(vfma_v), |
6596 | | NEONMAP0(vfmaq_v), |
6597 | | NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0), |
6598 | | NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0), |
6599 | | NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0), |
6600 | | NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0), |
6601 | | NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0), |
6602 | | NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0), |
6603 | | NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0), |
6604 | | NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0), |
6605 | | NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), |
6606 | | NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), |
6607 | | NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), |
6608 | | NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), |
6609 | | NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0), |
6610 | | NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0), |
6611 | | NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0), |
6612 | | NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), |
6613 | | NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), |
6614 | | NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), |
6615 | | NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0), |
6616 | | NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0), |
6617 | | NEONMAP0(vmovl_v), |
6618 | | NEONMAP0(vmovn_v), |
6619 | | NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), |
6620 | | NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), |
6621 | | NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), |
6622 | | NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), |
6623 | | NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), |
6624 | | NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), |
6625 | | NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), |
6626 | | NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), |
6627 | | NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), |
6628 | | NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), |
6629 | | NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), |
6630 | | NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), |
6631 | | NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0), |
6632 | | NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0), |
6633 | | NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), |
6634 | | NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0), |
6635 | | NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0), |
6636 | | NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), |
6637 | | NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), |
6638 | | NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), |
6639 | | NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), |
6640 | | NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), |
6641 | | NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), |
6642 | | NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType), |
6643 | | NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType), |
6644 | | NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType), |
6645 | | NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType), |
6646 | | NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType), |
6647 | | NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType), |
6648 | | NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType), |
6649 | | NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType), |
6650 | | NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), |
6651 | | NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), |
6652 | | NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), |
6653 | | NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0), |
6654 | | NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), |
6655 | | NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), |
6656 | | NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), |
6657 | | NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), |
6658 | | NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), |
6659 | | NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), |
6660 | | NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), |
6661 | | NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), |
6662 | | NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), |
6663 | | NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), |
6664 | | NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), |
6665 | | NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), |
6666 | | NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), |
6667 | | NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0), |
6668 | | NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), |
6669 | | NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), |
6670 | | NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), |
6671 | | NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), |
6672 | | NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), |
6673 | | NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), |
6674 | | NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), |
6675 | | NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType), |
6676 | | NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), |
6677 | | NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType), |
6678 | | NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), |
6679 | | NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType), |
6680 | | NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), |
6681 | | NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType), |
6682 | | NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), |
6683 | | NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType), |
6684 | | NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), |
6685 | | NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType), |
6686 | | NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), |
6687 | | NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType), |
6688 | | NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), |
6689 | | NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType), |
6690 | | NEONMAP0(vrndi_v), |
6691 | | NEONMAP0(vrndiq_v), |
6692 | | NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), |
6693 | | NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), |
6694 | | NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), |
6695 | | NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), |
6696 | | NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), |
6697 | | NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), |
6698 | | NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), |
6699 | | NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), |
6700 | | NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), |
6701 | | NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0), |
6702 | | NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0), |
6703 | | NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0), |
6704 | | NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0), |
6705 | | NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0), |
6706 | | NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0), |
6707 | | NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0), |
6708 | | NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0), |
6709 | | NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0), |
6710 | | NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0), |
6711 | | NEONMAP0(vshl_n_v), |
6712 | | NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), |
6713 | | NEONMAP0(vshll_n_v), |
6714 | | NEONMAP0(vshlq_n_v), |
6715 | | NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), |
6716 | | NEONMAP0(vshr_n_v), |
6717 | | NEONMAP0(vshrn_n_v), |
6718 | | NEONMAP0(vshrq_n_v), |
6719 | | NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0), |
6720 | | NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0), |
6721 | | NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0), |
6722 | | NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0), |
6723 | | NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0), |
6724 | | NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0), |
6725 | | NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0), |
6726 | | NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0), |
6727 | | NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0), |
6728 | | NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), |
6729 | | NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), |
6730 | | NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), |
6731 | | NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0), |
6732 | | NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0), |
6733 | | NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0), |
6734 | | NEONMAP0(vsubhn_v), |
6735 | | NEONMAP0(vtst_v), |
6736 | | NEONMAP0(vtstq_v), |
6737 | | NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0), |
6738 | | NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0), |
6739 | | NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0), |
6740 | | NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0), |
6741 | | }; |
6742 | | |
6743 | | static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { |
6744 | | NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), |
6745 | | NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), |
6746 | | NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), |
6747 | | NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), |
6748 | | NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), |
6749 | | NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), |
6750 | | NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), |
6751 | | NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), |
6752 | | NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), |
6753 | | NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), |
6754 | | NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), |
6755 | | NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), |
6756 | | NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), |
6757 | | NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), |
6758 | | NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), |
6759 | | NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), |
6760 | | NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), |
6761 | | NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), |
6762 | | NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), |
6763 | | NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), |
6764 | | NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), |
6765 | | NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), |
6766 | | NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), |
6767 | | NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), |
6768 | | NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), |
6769 | | NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), |
6770 | | NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), |
6771 | | NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), |
6772 | | NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), |
6773 | | NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), |
6774 | | NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), |
6775 | | NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), |
6776 | | NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), |
6777 | | NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), |
6778 | | NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0), |
6779 | | NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), |
6780 | | NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), |
6781 | | NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), |
6782 | | NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), |
6783 | | NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), |
6784 | | NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), |
6785 | | NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), |
6786 | | NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), |
6787 | | NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), |
6788 | | NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), |
6789 | | NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), |
6790 | | NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), |
6791 | | NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), |
6792 | | NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), |
6793 | | NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), |
6794 | | NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), |
6795 | | NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), |
6796 | | NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), |
6797 | | NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), |
6798 | | NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), |
6799 | | NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), |
6800 | | NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), |
6801 | | NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), |
6802 | | NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), |
6803 | | NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), |
6804 | | NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), |
6805 | | NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), |
6806 | | NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), |
6807 | | NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), |
6808 | | NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), |
6809 | | NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), |
6810 | | NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), |
6811 | | NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), |
6812 | | NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), |
6813 | | NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), |
6814 | | NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), |
6815 | | NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), |
6816 | | NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), |
6817 | | NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), |
6818 | | NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), |
6819 | | NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), |
6820 | | NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), |
6821 | | NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), |
6822 | | NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), |
6823 | | NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), |
6824 | | NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), |
6825 | | NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), |
6826 | | NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), |
6827 | | NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), |
6828 | | NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), |
6829 | | NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), |
6830 | | NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), |
6831 | | NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), |
6832 | | NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), |
6833 | | NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), |
6834 | | NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), |
6835 | | NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), |
6836 | | NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), |
6837 | | NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), |
6838 | | NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), |
6839 | | NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), |
6840 | | NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), |
6841 | | NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), |
6842 | | NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), |
6843 | | NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), |
6844 | | NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), |
6845 | | NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), |
6846 | | NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), |
6847 | | NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), |
6848 | | NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), |
6849 | | NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), |
6850 | | NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), |
6851 | | NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), |
6852 | | NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), |
6853 | | NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), |
6854 | | NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), |
6855 | | NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), |
6856 | | NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), |
6857 | | NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), |
6858 | | NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), |
6859 | | NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), |
6860 | | NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors), |
6861 | | NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType), |
6862 | | NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors), |
6863 | | NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType), |
6864 | | NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), |
6865 | | NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), |
6866 | | NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), |
6867 | | NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), |
6868 | | NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), |
6869 | | NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), |
6870 | | NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), |
6871 | | NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), |
6872 | | NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), |
6873 | | NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), |
6874 | | NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), |
6875 | | NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), |
6876 | | NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), |
6877 | | NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), |
6878 | | NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), |
6879 | | NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), |
6880 | | NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), |
6881 | | NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), |
6882 | | NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), |
6883 | | NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), |
6884 | | NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), |
6885 | | NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), |
6886 | | NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), |
6887 | | NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), |
6888 | | NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), |
6889 | | NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), |
6890 | | NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), |
6891 | | NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), |
6892 | | NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), |
6893 | | NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), |
6894 | | NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), |
6895 | | NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), |
6896 | | NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), |
6897 | | NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), |
6898 | | NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), |
6899 | | NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), |
6900 | | NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), |
6901 | | NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), |
6902 | | NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), |
6903 | | NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), |
6904 | | NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), |
6905 | | NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), |
6906 | | NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), |
6907 | | NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), |
6908 | | NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), |
6909 | | NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), |
6910 | | NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), |
6911 | | NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), |
6912 | | NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), |
6913 | | NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), |
6914 | | NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), |
6915 | | NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), |
6916 | | NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), |
6917 | | NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), |
6918 | | NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), |
6919 | | NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), |
6920 | | NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), |
6921 | | NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), |
6922 | | NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), |
6923 | | NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), |
6924 | | NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), |
6925 | | NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), |
6926 | | NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), |
6927 | | NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), |
6928 | | NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), |
6929 | | NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), |
6930 | | NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), |
6931 | | NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), |
6932 | | NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), |
6933 | | NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), |
6934 | | NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), |
6935 | | NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), |
6936 | | NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), |
6937 | | NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), |
6938 | | NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), |
6939 | | NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), |
6940 | | NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), |
6941 | | NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), |
6942 | | NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), |
6943 | | NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), |
6944 | | NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), |
6945 | | // FP16 scalar intrinisics go here. |
6946 | | NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), |
6947 | | NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), |
6948 | | NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), |
6949 | | NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), |
6950 | | NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), |
6951 | | NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), |
6952 | | NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), |
6953 | | NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), |
6954 | | NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), |
6955 | | NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), |
6956 | | NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), |
6957 | | NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), |
6958 | | NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), |
6959 | | NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), |
6960 | | NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), |
6961 | | NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), |
6962 | | NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), |
6963 | | NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), |
6964 | | NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), |
6965 | | NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), |
6966 | | NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), |
6967 | | NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), |
6968 | | NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), |
6969 | | NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), |
6970 | | NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), |
6971 | | NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), |
6972 | | NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), |
6973 | | NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), |
6974 | | NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), |
6975 | | NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), |
6976 | | NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), |
6977 | | NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), |
6978 | | NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), |
6979 | | NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), |
6980 | | }; |
6981 | | |
6982 | | // Some intrinsics are equivalent for codegen. |
6983 | | static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { |
6984 | | { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, }, |
6985 | | { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, }, |
6986 | | { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, }, |
6987 | | { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, }, |
6988 | | { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, }, |
6989 | | { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, |
6990 | | { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, |
6991 | | { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, |
6992 | | { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, }, |
6993 | | { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, }, |
6994 | | { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, |
6995 | | { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, |
6996 | | { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, |
6997 | | { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, }, |
6998 | | { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, }, |
6999 | | { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, }, |
7000 | | { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, }, |
7001 | | { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, }, |
7002 | | { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, }, |
7003 | | { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, }, |
7004 | | { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, }, |
7005 | | { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, }, |
7006 | | { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, }, |
7007 | | { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, }, |
7008 | | { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, }, |
7009 | | { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, |
7010 | | { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, |
7011 | | { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, |
7012 | | { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, }, |
7013 | | { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, }, |
7014 | | { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, |
7015 | | { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, }, |
7016 | | { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, }, |
7017 | | { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, }, |
7018 | | { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, }, |
7019 | | { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, }, |
7020 | | { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v }, |
7021 | | { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v }, |
7022 | | { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v }, |
7023 | | { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v }, |
7024 | | { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v }, |
7025 | | { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v }, |
7026 | | { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v }, |
7027 | | { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v }, |
7028 | | { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v }, |
7029 | | { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v }, |
7030 | | { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v }, |
7031 | | { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v }, |
7032 | | { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v }, |
7033 | | { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v }, |
7034 | | { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v }, |
7035 | | { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v }, |
7036 | | { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v }, |
7037 | | { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v }, |
7038 | | { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v }, |
7039 | | { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v }, |
7040 | | { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v }, |
7041 | | { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v }, |
7042 | | { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v }, |
7043 | | { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v }, |
7044 | | { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v }, |
7045 | | { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v }, |
7046 | | { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v }, |
7047 | | { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v }, |
7048 | | { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v }, |
7049 | | { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v }, |
7050 | | { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, }, |
7051 | | { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, }, |
7052 | | { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, }, |
7053 | | { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, }, |
7054 | | { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, }, |
7055 | | { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, }, |
7056 | | { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, }, |
7057 | | { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, }, |
7058 | | { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, }, |
7059 | | { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, }, |
7060 | | { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, }, |
7061 | | { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, }, |
7062 | | { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, }, |
7063 | | { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, }, |
7064 | | { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, }, |
7065 | | { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, }, |
7066 | | { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, }, |
7067 | | { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, }, |
7068 | | { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, }, |
7069 | | { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, }, |
7070 | | { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, }, |
7071 | | { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, }, |
7072 | | { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, }, |
7073 | | { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, }, |
7074 | | { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, }, |
7075 | | { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, }, |
7076 | | { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, }, |
7077 | | { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, }, |
7078 | | { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, }, |
7079 | | { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, }, |
7080 | | { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, }, |
7081 | | { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, }, |
7082 | | { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, }, |
7083 | | { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, }, |
7084 | | { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, }, |
7085 | | { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, }, |
7086 | | { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, }, |
7087 | | { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, }, |
7088 | | { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, }, |
7089 | | { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, }, |
7090 | | { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, }, |
7091 | | { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, }, |
7092 | | { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, }, |
7093 | | { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, }, |
7094 | | { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v }, |
7095 | | { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v }, |
7096 | | { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v }, |
7097 | | { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v }, |
7098 | | { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v }, |
7099 | | { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v }, |
7100 | | { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v }, |
7101 | | { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v }, |
7102 | | { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v }, |
7103 | | { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v }, |
7104 | | { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v }, |
7105 | | { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v }, |
7106 | | { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v }, |
7107 | | { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v }, |
7108 | | { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v }, |
7109 | | { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v }, |
7110 | | { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v }, |
7111 | | { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v }, |
7112 | | { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v }, |
7113 | | { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, |
7114 | | { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, |
7115 | | { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, |
7116 | | { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, }, |
7117 | | { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, }, |
7118 | | { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, }, |
7119 | | { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, }, |
7120 | | { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, }, |
7121 | | { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, }, |
7122 | | // The mangling rules cause us to have one ID for each type for vldap1(q)_lane |
7123 | | // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an |
7124 | | // arbitrary one to be handled as tha canonical variation. |
7125 | | { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 }, |
7126 | | { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 }, |
7127 | | { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 }, |
7128 | | { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, |
7129 | | { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, |
7130 | | { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, |
7131 | | { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 }, |
7132 | | { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 }, |
7133 | | { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 }, |
7134 | | { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, |
7135 | | { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, |
7136 | | { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, |
7137 | | }; |
7138 | | |
7139 | | #undef NEONMAP0 |
7140 | | #undef NEONMAP1 |
7141 | | #undef NEONMAP2 |
7142 | | |
7143 | | #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ |
7144 | | { \ |
7145 | | #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ |
7146 | | TypeModifier \ |
7147 | | } |
7148 | | |
7149 | | #define SVEMAP2(NameBase, TypeModifier) \ |
7150 | | { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier } |
7151 | | static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { |
7152 | | #define GET_SVE_LLVM_INTRINSIC_MAP |
7153 | | #include "clang/Basic/arm_sve_builtin_cg.inc" |
7154 | | #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" |
7155 | | #undef GET_SVE_LLVM_INTRINSIC_MAP |
7156 | | }; |
7157 | | |
7158 | | #undef SVEMAP1 |
7159 | | #undef SVEMAP2 |
7160 | | |
7161 | | #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ |
7162 | | { \ |
7163 | | #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ |
7164 | | TypeModifier \ |
7165 | | } |
7166 | | |
7167 | | #define SMEMAP2(NameBase, TypeModifier) \ |
7168 | | { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier } |
7169 | | static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { |
7170 | | #define GET_SME_LLVM_INTRINSIC_MAP |
7171 | | #include "clang/Basic/arm_sme_builtin_cg.inc" |
7172 | | #undef GET_SME_LLVM_INTRINSIC_MAP |
7173 | | }; |
7174 | | |
7175 | | #undef SMEMAP1 |
7176 | | #undef SMEMAP2 |
7177 | | |
7178 | | static bool NEONSIMDIntrinsicsProvenSorted = false; |
7179 | | |
7180 | | static bool AArch64SIMDIntrinsicsProvenSorted = false; |
7181 | | static bool AArch64SISDIntrinsicsProvenSorted = false; |
7182 | | static bool AArch64SVEIntrinsicsProvenSorted = false; |
7183 | | static bool AArch64SMEIntrinsicsProvenSorted = false; |
7184 | | |
7185 | | static const ARMVectorIntrinsicInfo * |
7186 | | findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, |
7187 | 49.2k | unsigned BuiltinID, bool &MapProvenSorted) { |
7188 | | |
7189 | 49.2k | #ifndef NDEBUG |
7190 | 49.2k | if (!MapProvenSorted) { |
7191 | 2.18k | assert(llvm::is_sorted(IntrinsicMap)); |
7192 | 2.18k | MapProvenSorted = true; |
7193 | 2.18k | } |
7194 | 49.2k | #endif |
7195 | | |
7196 | 49.2k | const ARMVectorIntrinsicInfo *Builtin = |
7197 | 49.2k | llvm::lower_bound(IntrinsicMap, BuiltinID); |
7198 | | |
7199 | 49.2k | if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID49.2k ) |
7200 | 44.1k | return Builtin; |
7201 | | |
7202 | 5.14k | return nullptr; |
7203 | 49.2k | } |
7204 | | |
7205 | | Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, |
7206 | | unsigned Modifier, |
7207 | | llvm::Type *ArgType, |
7208 | 1.00k | const CallExpr *E) { |
7209 | 1.00k | int VectorSize = 0; |
7210 | 1.00k | if (Modifier & Use64BitVectors) |
7211 | 58 | VectorSize = 64; |
7212 | 948 | else if (Modifier & Use128BitVectors) |
7213 | 2 | VectorSize = 128; |
7214 | | |
7215 | | // Return type. |
7216 | 1.00k | SmallVector<llvm::Type *, 3> Tys; |
7217 | 1.00k | if (Modifier & AddRetType) { |
7218 | 168 | llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); |
7219 | 168 | if (Modifier & VectorizeRetType) |
7220 | 20 | Ty = llvm::FixedVectorType::get( |
7221 | 20 | Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 10 ); |
7222 | | |
7223 | 168 | Tys.push_back(Ty); |
7224 | 168 | } |
7225 | | |
7226 | | // Arguments. |
7227 | 1.00k | if (Modifier & VectorizeArgTypes) { |
7228 | 44 | int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits()40 : 14 ; |
7229 | 44 | ArgType = llvm::FixedVectorType::get(ArgType, Elts); |
7230 | 44 | } |
7231 | | |
7232 | 1.00k | if (Modifier & (Add1ArgType | Add2ArgTypes)) |
7233 | 897 | Tys.push_back(ArgType); |
7234 | | |
7235 | 1.00k | if (Modifier & Add2ArgTypes) |
7236 | 0 | Tys.push_back(ArgType); |
7237 | | |
7238 | 1.00k | if (Modifier & InventFloatType) |
7239 | 0 | Tys.push_back(FloatTy); |
7240 | | |
7241 | 1.00k | return CGM.getIntrinsic(IntrinsicID, Tys); |
7242 | 1.00k | } |
7243 | | |
7244 | | static Value *EmitCommonNeonSISDBuiltinExpr( |
7245 | | CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, |
7246 | 271 | SmallVectorImpl<Value *> &Ops, const CallExpr *E) { |
7247 | 271 | unsigned BuiltinID = SISDInfo.BuiltinID; |
7248 | 271 | unsigned int Int = SISDInfo.LLVMIntrinsic; |
7249 | 271 | unsigned Modifier = SISDInfo.TypeModifier; |
7250 | 271 | const char *s = SISDInfo.NameHint; |
7251 | | |
7252 | 271 | switch (BuiltinID) { |
7253 | 0 | case NEON::BI__builtin_neon_vcled_s64: |
7254 | 0 | case NEON::BI__builtin_neon_vcled_u64: |
7255 | 0 | case NEON::BI__builtin_neon_vcles_f32: |
7256 | 0 | case NEON::BI__builtin_neon_vcled_f64: |
7257 | 0 | case NEON::BI__builtin_neon_vcltd_s64: |
7258 | 0 | case NEON::BI__builtin_neon_vcltd_u64: |
7259 | 0 | case NEON::BI__builtin_neon_vclts_f32: |
7260 | 0 | case NEON::BI__builtin_neon_vcltd_f64: |
7261 | 1 | case NEON::BI__builtin_neon_vcales_f32: |
7262 | 2 | case NEON::BI__builtin_neon_vcaled_f64: |
7263 | 3 | case NEON::BI__builtin_neon_vcalts_f32: |
7264 | 4 | case NEON::BI__builtin_neon_vcaltd_f64: |
7265 | | // Only one direction of comparisons actually exist, cmle is actually a cmge |
7266 | | // with swapped operands. The table gives us the right intrinsic but we |
7267 | | // still need to do the swap. |
7268 | 4 | std::swap(Ops[0], Ops[1]); |
7269 | 4 | break; |
7270 | 271 | } |
7271 | | |
7272 | 271 | assert(Int && "Generic code assumes a valid intrinsic"); |
7273 | | |
7274 | | // Determine the type(s) of this overloaded AArch64 intrinsic. |
7275 | 271 | const Expr *Arg = E->getArg(0); |
7276 | 271 | llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); |
7277 | 271 | Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); |
7278 | | |
7279 | 271 | int j = 0; |
7280 | 271 | ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); |
7281 | 271 | for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); |
7282 | 705 | ai != ae; ++ai, ++j434 ) { |
7283 | 434 | llvm::Type *ArgTy = ai->getType(); |
7284 | 434 | if (Ops[j]->getType()->getPrimitiveSizeInBits() == |
7285 | 434 | ArgTy->getPrimitiveSizeInBits()) |
7286 | 334 | continue; |
7287 | | |
7288 | 100 | assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); |
7289 | | // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate |
7290 | | // it before inserting. |
7291 | 100 | Ops[j] = CGF.Builder.CreateTruncOrBitCast( |
7292 | 100 | Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType()); |
7293 | 100 | Ops[j] = |
7294 | 100 | CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0); |
7295 | 100 | } |
7296 | | |
7297 | 271 | Value *Result = CGF.EmitNeonCall(F, Ops, s); |
7298 | 271 | llvm::Type *ResultType = CGF.ConvertType(E->getType()); |
7299 | 271 | if (ResultType->getPrimitiveSizeInBits().getFixedValue() < |
7300 | 271 | Result->getType()->getPrimitiveSizeInBits().getFixedValue()) |
7301 | 60 | return CGF.Builder.CreateExtractElement(Result, C0); |
7302 | | |
7303 | 211 | return CGF.Builder.CreateBitCast(Result, ResultType, s); |
7304 | 271 | } |
7305 | | |
7306 | | Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( |
7307 | | unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, |
7308 | | const char *NameHint, unsigned Modifier, const CallExpr *E, |
7309 | | SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, |
7310 | 2.84k | llvm::Triple::ArchType Arch) { |
7311 | | // Get the last argument, which specifies the vector type. |
7312 | 2.84k | const Expr *Arg = E->getArg(E->getNumArgs() - 1); |
7313 | 2.84k | std::optional<llvm::APSInt> NeonTypeConst = |
7314 | 2.84k | Arg->getIntegerConstantExpr(getContext()); |
7315 | 2.84k | if (!NeonTypeConst) |
7316 | 0 | return nullptr; |
7317 | | |
7318 | | // Determine the type of this overloaded NEON intrinsic. |
7319 | 2.84k | NeonTypeFlags Type(NeonTypeConst->getZExtValue()); |
7320 | 2.84k | bool Usgn = Type.isUnsigned(); |
7321 | 2.84k | bool Quad = Type.isQuad(); |
7322 | 2.84k | const bool HasLegalHalfType = getTarget().hasLegalHalfType(); |
7323 | 2.84k | const bool AllowBFloatArgsAndRet = |
7324 | 2.84k | getTargetHooks().getABIInfo().allowBFloatArgsAndRet(); |
7325 | | |
7326 | 2.84k | llvm::FixedVectorType *VTy = |
7327 | 2.84k | GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet); |
7328 | 2.84k | llvm::Type *Ty = VTy; |
7329 | 2.84k | if (!Ty) |
7330 | 0 | return nullptr; |
7331 | | |
7332 | 2.84k | auto getAlignmentValue32 = [&](Address addr) -> Value* { |
7333 | 390 | return Builder.getInt32(addr.getAlignment().getQuantity()); |
7334 | 390 | }; |
7335 | | |
7336 | 2.84k | unsigned Int = LLVMIntrinsic; |
7337 | 2.84k | if ((Modifier & UnsignedAlts) && !Usgn456 ) |
7338 | 236 | Int = AltLLVMIntrinsic; |
7339 | | |
7340 | 2.84k | switch (BuiltinID) { |
7341 | 684 | default: break; |
7342 | 684 | case NEON::BI__builtin_neon_splat_lane_v: |
7343 | 313 | case NEON::BI__builtin_neon_splat_laneq_v: |
7344 | 407 | case NEON::BI__builtin_neon_splatq_lane_v: |
7345 | 461 | case NEON::BI__builtin_neon_splatq_laneq_v: { |
7346 | 461 | auto NumElements = VTy->getElementCount(); |
7347 | 461 | if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v) |
7348 | 94 | NumElements = NumElements * 2; |
7349 | 461 | if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v) |
7350 | 127 | NumElements = NumElements.divideCoefficientBy(2); |
7351 | | |
7352 | 461 | Ops[0] = Builder.CreateBitCast(Ops[0], VTy); |
7353 | 461 | return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements); |
7354 | 407 | } |
7355 | 16 | case NEON::BI__builtin_neon_vpadd_v: |
7356 | 27 | case NEON::BI__builtin_neon_vpaddq_v: |
7357 | | // We don't allow fp/int overloading of intrinsics. |
7358 | 27 | if (VTy->getElementType()->isFloatingPointTy() && |
7359 | 27 | Int == Intrinsic::aarch64_neon_addp7 ) |
7360 | 5 | Int = Intrinsic::aarch64_neon_faddp; |
7361 | 27 | break; |
7362 | 12 | case NEON::BI__builtin_neon_vabs_v: |
7363 | 24 | case NEON::BI__builtin_neon_vabsq_v: |
7364 | 24 | if (VTy->getElementType()->isFloatingPointTy()) |
7365 | 10 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); |
7366 | 14 | return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); |
7367 | 6 | case NEON::BI__builtin_neon_vadd_v: |
7368 | 12 | case NEON::BI__builtin_neon_vaddq_v: { |
7369 | 12 | llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 166 : 86 ); |
7370 | 12 | Ops[0] = Builder.CreateBitCast(Ops[0], VTy); |
7371 | 12 | Ops[1] = Builder.CreateBitCast(Ops[1], VTy); |
7372 | 12 | Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); |
7373 | 12 | return Builder.CreateBitCast(Ops[0], Ty); |
7374 | 6 | } |
7375 | 12 | case NEON::BI__builtin_neon_vaddhn_v: { |
7376 | 12 | llvm::FixedVectorType *SrcTy = |
7377 | 12 | llvm::FixedVectorType::getExtendedElementVectorType(VTy); |
7378 | | |
7379 | | // %sum = add <4 x i32> %lhs, %rhs |
7380 | 12 | Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); |
7381 | 12 | Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); |
7382 | 12 | Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); |
7383 | | |
7384 | | // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> |
7385 | 12 | Constant *ShiftAmt = |
7386 | 12 | ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); |
7387 | 12 | Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); |
7388 | | |
7389 | | // %res = trunc <4 x i32> %high to <4 x i16> |
7390 | 12 | return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); |
7391 | 6 | } |
7392 | 5 | case NEON::BI__builtin_neon_vcale_v: |
7393 | 10 | case NEON::BI__builtin_neon_vcaleq_v: |
7394 | 15 | case NEON::BI__builtin_neon_vcalt_v: |
7395 | 20 | case NEON::BI__builtin_neon_vcaltq_v: |
7396 | 20 | std::swap(Ops[0], Ops[1]); |
7397 | 20 | [[fallthrough]]; |
7398 | 25 | case NEON::BI__builtin_neon_vcage_v: |
7399 | 30 | case NEON::BI__builtin_neon_vcageq_v: |
7400 | 35 | case NEON::BI__builtin_neon_vcagt_v: |
7401 | 40 | case NEON::BI__builtin_neon_vcagtq_v: { |
7402 | 40 | llvm::Type *Ty; |
7403 | 40 | switch (VTy->getScalarSizeInBits()) { |
7404 | 0 | default: llvm_unreachable("unexpected type"); |
7405 | 16 | case 32: |
7406 | 16 | Ty = FloatTy; |
7407 | 16 | break; |
7408 | 8 | case 64: |
7409 | 8 | Ty = DoubleTy; |
7410 | 8 | break; |
7411 | 16 | case 16: |
7412 | 16 | Ty = HalfTy; |
7413 | 16 | break; |
7414 | 40 | } |
7415 | 40 | auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements()); |
7416 | 40 | llvm::Type *Tys[] = { VTy, VecFlt }; |
7417 | 40 | Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); |
7418 | 40 | return EmitNeonCall(F, Ops, NameHint); |
7419 | 40 | } |
7420 | 14 | case NEON::BI__builtin_neon_vceqz_v: |
7421 | 28 | case NEON::BI__builtin_neon_vceqzq_v: |
7422 | 28 | return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, |
7423 | 28 | ICmpInst::ICMP_EQ, "vceqz"); |
7424 | 8 | case NEON::BI__builtin_neon_vcgez_v: |
7425 | 16 | case NEON::BI__builtin_neon_vcgezq_v: |
7426 | 16 | return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, |
7427 | 16 | ICmpInst::ICMP_SGE, "vcgez"); |
7428 | 8 | case NEON::BI__builtin_neon_vclez_v: |
7429 | 16 | case NEON::BI__builtin_neon_vclezq_v: |
7430 | 16 | return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, |
7431 | 16 | ICmpInst::ICMP_SLE, "vclez"); |
7432 | 8 | case NEON::BI__builtin_neon_vcgtz_v: |
7433 | 16 | case NEON::BI__builtin_neon_vcgtzq_v: |
7434 | 16 | return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, |
7435 | 16 | ICmpInst::ICMP_SGT, "vcgtz"); |
7436 | 8 | case NEON::BI__builtin_neon_vcltz_v: |
7437 | 16 | case NEON::BI__builtin_neon_vcltzq_v: |
7438 | 16 | return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, |
7439 | 16 | ICmpInst::ICMP_SLT, "vcltz"); |
7440 | 12 | case NEON::BI__builtin_neon_vclz_v: |
7441 | 24 | case NEON::BI__builtin_neon_vclzq_v: |
7442 | | // We generate target-independent intrinsic, which needs a second argument |
7443 | | // for whether or not clz of zero is undefined; on ARM it isn't. |
7444 | 24 | Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); |
7445 | 24 | break; |
7446 | 4 | case NEON::BI__builtin_neon_vcvt_f32_v: |
7447 | 8 | case NEON::BI__builtin_neon_vcvtq_f32_v: |
7448 | 8 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
7449 | 8 | Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), |
7450 | 8 | HasLegalHalfType); |
7451 | 8 | return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")4 |
7452 | 8 | : Builder.CreateSIToFP(Ops[0], Ty, "vcvt")4 ; |
7453 | 2 | case NEON::BI__builtin_neon_vcvt_f16_s16: |
7454 | 4 | case NEON::BI__builtin_neon_vcvt_f16_u16: |
7455 | 6 | case NEON::BI__builtin_neon_vcvtq_f16_s16: |
7456 | 8 | case NEON::BI__builtin_neon_vcvtq_f16_u16: |
7457 | 8 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
7458 | 8 | Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), |
7459 | 8 | HasLegalHalfType); |
7460 | 8 | return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")4 |
7461 | 8 | : Builder.CreateSIToFP(Ops[0], Ty, "vcvt")4 ; |
7462 | 2 | case NEON::BI__builtin_neon_vcvt_n_f16_s16: |
7463 | 4 | case NEON::BI__builtin_neon_vcvt_n_f16_u16: |
7464 | 6 | case NEON::BI__builtin_neon_vcvtq_n_f16_s16: |
7465 | 8 | case NEON::BI__builtin_neon_vcvtq_n_f16_u16: { |
7466 | 8 | llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; |
7467 | 8 | Function *F = CGM.getIntrinsic(Int, Tys); |
7468 | 8 | return EmitNeonCall(F, Ops, "vcvt_n"); |
7469 | 6 | } |
7470 | 4 | case NEON::BI__builtin_neon_vcvt_n_f32_v: |
7471 | 6 | case NEON::BI__builtin_neon_vcvt_n_f64_v: |
7472 | 10 | case NEON::BI__builtin_neon_vcvtq_n_f32_v: |
7473 | 12 | case NEON::BI__builtin_neon_vcvtq_n_f64_v: { |
7474 | 12 | llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; |
7475 | 12 | Int = Usgn ? LLVMIntrinsic6 : AltLLVMIntrinsic6 ; |
7476 | 12 | Function *F = CGM.getIntrinsic(Int, Tys); |
7477 | 12 | return EmitNeonCall(F, Ops, "vcvt_n"); |
7478 | 10 | } |
7479 | 2 | case NEON::BI__builtin_neon_vcvt_n_s16_f16: |
7480 | 4 | case NEON::BI__builtin_neon_vcvt_n_s32_v: |
7481 | 6 | case NEON::BI__builtin_neon_vcvt_n_u16_f16: |
7482 | 8 | case NEON::BI__builtin_neon_vcvt_n_u32_v: |
7483 | 9 | case NEON::BI__builtin_neon_vcvt_n_s64_v: |
7484 | 10 | case NEON::BI__builtin_neon_vcvt_n_u64_v: |
7485 | 12 | case NEON::BI__builtin_neon_vcvtq_n_s16_f16: |
7486 | 14 | case NEON::BI__builtin_neon_vcvtq_n_s32_v: |
7487 | 16 | case NEON::BI__builtin_neon_vcvtq_n_u16_f16: |
7488 | 18 | case NEON::BI__builtin_neon_vcvtq_n_u32_v: |
7489 | 19 | case NEON::BI__builtin_neon_vcvtq_n_s64_v: |
7490 | 20 | case NEON::BI__builtin_neon_vcvtq_n_u64_v: { |
7491 | 20 | llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; |
7492 | 20 | Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); |
7493 | 20 | return EmitNeonCall(F, Ops, "vcvt_n"); |
7494 | 19 | } |
7495 | 1 | case NEON::BI__builtin_neon_vcvt_s32_v: |
7496 | 2 | case NEON::BI__builtin_neon_vcvt_u32_v: |
7497 | 2 | case NEON::BI__builtin_neon_vcvt_s64_v: |
7498 | 2 | case NEON::BI__builtin_neon_vcvt_u64_v: |
7499 | 3 | case NEON::BI__builtin_neon_vcvt_s16_f16: |
7500 | 4 | case NEON::BI__builtin_neon_vcvt_u16_f16: |
7501 | 5 | case NEON::BI__builtin_neon_vcvtq_s32_v: |
7502 | 6 | case NEON::BI__builtin_neon_vcvtq_u32_v: |
7503 | 6 | case NEON::BI__builtin_neon_vcvtq_s64_v: |
7504 | 6 | case NEON::BI__builtin_neon_vcvtq_u64_v: |
7505 | 7 | case NEON::BI__builtin_neon_vcvtq_s16_f16: |
7506 | 8 | case NEON::BI__builtin_neon_vcvtq_u16_f16: { |
7507 | 8 | Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); |
7508 | 8 | return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")4 |
7509 | 8 | : Builder.CreateFPToSI(Ops[0], Ty, "vcvt")4 ; |
7510 | 7 | } |
7511 | 1 | case NEON::BI__builtin_neon_vcvta_s16_f16: |
7512 | 2 | case NEON::BI__builtin_neon_vcvta_s32_v: |
7513 | 2 | case NEON::BI__builtin_neon_vcvta_s64_v: |
7514 | 3 | case NEON::BI__builtin_neon_vcvta_u16_f16: |
7515 | 4 | case NEON::BI__builtin_neon_vcvta_u32_v: |
7516 | 4 | case NEON::BI__builtin_neon_vcvta_u64_v: |
7517 | 5 | case NEON::BI__builtin_neon_vcvtaq_s16_f16: |
7518 | 6 | case NEON::BI__builtin_neon_vcvtaq_s32_v: |
7519 | 6 | case NEON::BI__builtin_neon_vcvtaq_s64_v: |
7520 | 6 | case NEON::BI__builtin_neon_vcvtaq_u16_f16: |
7521 | 7 | case NEON::BI__builtin_neon_vcvtaq_u32_v: |
7522 | 7 | case NEON::BI__builtin_neon_vcvtaq_u64_v: |
7523 | 8 | case NEON::BI__builtin_neon_vcvtn_s16_f16: |
7524 | 9 | case NEON::BI__builtin_neon_vcvtn_s32_v: |
7525 | 9 | case NEON::BI__builtin_neon_vcvtn_s64_v: |
7526 | 10 | case NEON::BI__builtin_neon_vcvtn_u16_f16: |
7527 | 11 | case NEON::BI__builtin_neon_vcvtn_u32_v: |
7528 | 11 | case NEON::BI__builtin_neon_vcvtn_u64_v: |
7529 | 12 | case NEON::BI__builtin_neon_vcvtnq_s16_f16: |
7530 | 13 | case NEON::BI__builtin_neon_vcvtnq_s32_v: |
7531 | 13 | case NEON::BI__builtin_neon_vcvtnq_s64_v: |
7532 | 14 | case NEON::BI__builtin_neon_vcvtnq_u16_f16: |
7533 | 15 | case NEON::BI__builtin_neon_vcvtnq_u32_v: |
7534 | 15 | case NEON::BI__builtin_neon_vcvtnq_u64_v: |
7535 | 16 | case NEON::BI__builtin_neon_vcvtp_s16_f16: |
7536 | 17 | case NEON::BI__builtin_neon_vcvtp_s32_v: |
7537 | 17 | case NEON::BI__builtin_neon_vcvtp_s64_v: |
7538 | 18 | case NEON::BI__builtin_neon_vcvtp_u16_f16: |
7539 | 19 | case NEON::BI__builtin_neon_vcvtp_u32_v: |
7540 | 19 | case NEON::BI__builtin_neon_vcvtp_u64_v: |
7541 | 20 | case NEON::BI__builtin_neon_vcvtpq_s16_f16: |
7542 | 21 | case NEON::BI__builtin_neon_vcvtpq_s32_v: |
7543 | 21 | case NEON::BI__builtin_neon_vcvtpq_s64_v: |
7544 | 22 | case NEON::BI__builtin_neon_vcvtpq_u16_f16: |
7545 | 23 | case NEON::BI__builtin_neon_vcvtpq_u32_v: |
7546 | 23 | case NEON::BI__builtin_neon_vcvtpq_u64_v: |
7547 | 24 | case NEON::BI__builtin_neon_vcvtm_s16_f16: |
7548 | 25 | case NEON::BI__builtin_neon_vcvtm_s32_v: |
7549 | 25 | case NEON::BI__builtin_neon_vcvtm_s64_v: |
7550 | 26 | case NEON::BI__builtin_neon_vcvtm_u16_f16: |
7551 | 27 | case NEON::BI__builtin_neon_vcvtm_u32_v: |
7552 | 27 | case NEON::BI__builtin_neon_vcvtm_u64_v: |
7553 | 28 | case NEON::BI__builtin_neon_vcvtmq_s16_f16: |
7554 | 29 | case NEON::BI__builtin_neon_vcvtmq_s32_v: |
7555 | 29 | case NEON::BI__builtin_neon_vcvtmq_s64_v: |
7556 | 30 | case NEON::BI__builtin_neon_vcvtmq_u16_f16: |
7557 | 31 | case NEON::BI__builtin_neon_vcvtmq_u32_v: |
7558 | 31 | case NEON::BI__builtin_neon_vcvtmq_u64_v: { |
7559 | 31 | llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; |
7560 | 31 | return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); |
7561 | 31 | } |
7562 | 1 | case NEON::BI__builtin_neon_vcvtx_f32_v: { |
7563 | 1 | llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty}; |
7564 | 1 | return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); |
7565 | | |
7566 | 31 | } |
7567 | 26 | case NEON::BI__builtin_neon_vext_v: |
7568 | 52 | case NEON::BI__builtin_neon_vextq_v: { |
7569 | 52 | int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); |
7570 | 52 | SmallVector<int, 16> Indices; |
7571 | 346 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i294 ) |
7572 | 294 | Indices.push_back(i+CV); |
7573 | | |
7574 | 52 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
7575 | 52 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
7576 | 52 | return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); |
7577 | 26 | } |
7578 | 16 | case NEON::BI__builtin_neon_vfma_v: |
7579 | 32 | case NEON::BI__builtin_neon_vfmaq_v: { |
7580 | 32 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
7581 | 32 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
7582 | 32 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
7583 | | |
7584 | | // NEON intrinsic puts accumulator first, unlike the LLVM fma. |
7585 | 32 | return emitCallMaybeConstrainedFPBuiltin( |
7586 | 32 | *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, |
7587 | 32 | {Ops[1], Ops[2], Ops[0]}); |
7588 | 16 | } |
7589 | 15 | case NEON::BI__builtin_neon_vld1_v: |
7590 | 33 | case NEON::BI__builtin_neon_vld1q_v: { |
7591 | 33 | llvm::Type *Tys[] = {Ty, Int8PtrTy}; |
7592 | 33 | Ops.push_back(getAlignmentValue32(PtrOp0)); |
7593 | 33 | return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); |
7594 | 15 | } |
7595 | 28 | case NEON::BI__builtin_neon_vld1_x2_v: |
7596 | 56 | case NEON::BI__builtin_neon_vld1q_x2_v: |
7597 | 84 | case NEON::BI__builtin_neon_vld1_x3_v: |
7598 | 112 | case NEON::BI__builtin_neon_vld1q_x3_v: |
7599 | 140 | case NEON::BI__builtin_neon_vld1_x4_v: |
7600 | 168 | case NEON::BI__builtin_neon_vld1q_x4_v: { |
7601 | 168 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
7602 | 168 | Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); |
7603 | 168 | Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); |
7604 | 168 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
7605 | 140 | } |
7606 | 13 | case NEON::BI__builtin_neon_vld2_v: |
7607 | 24 | case NEON::BI__builtin_neon_vld2q_v: |
7608 | 37 | case NEON::BI__builtin_neon_vld3_v: |
7609 | 48 | case NEON::BI__builtin_neon_vld3q_v: |
7610 | 61 | case NEON::BI__builtin_neon_vld4_v: |
7611 | 72 | case NEON::BI__builtin_neon_vld4q_v: |
7612 | 85 | case NEON::BI__builtin_neon_vld2_dup_v: |
7613 | 96 | case NEON::BI__builtin_neon_vld2q_dup_v: |
7614 | 109 | case NEON::BI__builtin_neon_vld3_dup_v: |
7615 | 120 | case NEON::BI__builtin_neon_vld3q_dup_v: |
7616 | 133 | case NEON::BI__builtin_neon_vld4_dup_v: |
7617 | 144 | case NEON::BI__builtin_neon_vld4q_dup_v: { |
7618 | 144 | llvm::Type *Tys[] = {Ty, Int8PtrTy}; |
7619 | 144 | Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); |
7620 | 144 | Value *Align = getAlignmentValue32(PtrOp1); |
7621 | 144 | Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); |
7622 | 144 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
7623 | 133 | } |
7624 | 14 | case NEON::BI__builtin_neon_vld1_dup_v: |
7625 | 27 | case NEON::BI__builtin_neon_vld1q_dup_v: { |
7626 | 27 | Value *V = PoisonValue::get(Ty); |
7627 | 27 | PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); |
7628 | 27 | LoadInst *Ld = Builder.CreateLoad(PtrOp0); |
7629 | 27 | llvm::Constant *CI = ConstantInt::get(SizeTy, 0); |
7630 | 27 | Ops[0] = Builder.CreateInsertElement(V, Ld, CI); |
7631 | 27 | return EmitNeonSplat(Ops[0], CI); |
7632 | 14 | } |
7633 | 11 | case NEON::BI__builtin_neon_vld2_lane_v: |
7634 | 19 | case NEON::BI__builtin_neon_vld2q_lane_v: |
7635 | 30 | case NEON::BI__builtin_neon_vld3_lane_v: |
7636 | 38 | case NEON::BI__builtin_neon_vld3q_lane_v: |
7637 | 49 | case NEON::BI__builtin_neon_vld4_lane_v: |
7638 | 57 | case NEON::BI__builtin_neon_vld4q_lane_v: { |
7639 | 57 | llvm::Type *Tys[] = {Ty, Int8PtrTy}; |
7640 | 57 | Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); |
7641 | 228 | for (unsigned I = 2; I < Ops.size() - 1; ++I171 ) |
7642 | 171 | Ops[I] = Builder.CreateBitCast(Ops[I], Ty); |
7643 | 57 | Ops.push_back(getAlignmentValue32(PtrOp1)); |
7644 | 57 | Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint); |
7645 | 57 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
7646 | 49 | } |
7647 | 12 | case NEON::BI__builtin_neon_vmovl_v: { |
7648 | 12 | llvm::FixedVectorType *DTy = |
7649 | 12 | llvm::FixedVectorType::getTruncatedElementVectorType(VTy); |
7650 | 12 | Ops[0] = Builder.CreateBitCast(Ops[0], DTy); |
7651 | 12 | if (Usgn) |
7652 | 6 | return Builder.CreateZExt(Ops[0], Ty, "vmovl"); |
7653 | 6 | return Builder.CreateSExt(Ops[0], Ty, "vmovl"); |
7654 | 12 | } |
7655 | 12 | case NEON::BI__builtin_neon_vmovn_v: { |
7656 | 12 | llvm::FixedVectorType *QTy = |
7657 | 12 | llvm::FixedVectorType::getExtendedElementVectorType(VTy); |
7658 | 12 | Ops[0] = Builder.CreateBitCast(Ops[0], QTy); |
7659 | 12 | return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); |
7660 | 12 | } |
7661 | 7 | case NEON::BI__builtin_neon_vmull_v: |
7662 | | // FIXME: the integer vmull operations could be emitted in terms of pure |
7663 | | // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of |
7664 | | // hoisting the exts outside loops. Until global ISel comes along that can |
7665 | | // see through such movement this leads to bad CodeGen. So we need an |
7666 | | // intrinsic for now. |
7667 | 7 | Int = Usgn ? Intrinsic::arm_neon_vmullu3 : Intrinsic::arm_neon_vmulls4 ; |
7668 | 7 | Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp1 : Int6 ; |
7669 | 7 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); |
7670 | 6 | case NEON::BI__builtin_neon_vpadal_v: |
7671 | 12 | case NEON::BI__builtin_neon_vpadalq_v: { |
7672 | | // The source operand type has twice as many elements of half the size. |
7673 | 12 | unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); |
7674 | 12 | llvm::Type *EltTy = |
7675 | 12 | llvm::IntegerType::get(getLLVMContext(), EltBits / 2); |
7676 | 12 | auto *NarrowTy = |
7677 | 12 | llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); |
7678 | 12 | llvm::Type *Tys[2] = { Ty, NarrowTy }; |
7679 | 12 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); |
7680 | 6 | } |
7681 | 12 | case NEON::BI__builtin_neon_vpaddl_v: |
7682 | 24 | case NEON::BI__builtin_neon_vpaddlq_v: { |
7683 | | // The source operand type has twice as many elements of half the size. |
7684 | 24 | unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); |
7685 | 24 | llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); |
7686 | 24 | auto *NarrowTy = |
7687 | 24 | llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); |
7688 | 24 | llvm::Type *Tys[2] = { Ty, NarrowTy }; |
7689 | 24 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); |
7690 | 12 | } |
7691 | 6 | case NEON::BI__builtin_neon_vqdmlal_v: |
7692 | 12 | case NEON::BI__builtin_neon_vqdmlsl_v: { |
7693 | 12 | SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); |
7694 | 12 | Ops[1] = |
7695 | 12 | EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); |
7696 | 12 | Ops.resize(2); |
7697 | 12 | return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); |
7698 | 6 | } |
7699 | 4 | case NEON::BI__builtin_neon_vqdmulhq_lane_v: |
7700 | 8 | case NEON::BI__builtin_neon_vqdmulh_lane_v: |
7701 | 12 | case NEON::BI__builtin_neon_vqrdmulhq_lane_v: |
7702 | 16 | case NEON::BI__builtin_neon_vqrdmulh_lane_v: { |
7703 | 16 | auto *RTy = cast<llvm::FixedVectorType>(Ty); |
7704 | 16 | if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || |
7705 | 16 | BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v12 ) |
7706 | 8 | RTy = llvm::FixedVectorType::get(RTy->getElementType(), |
7707 | 8 | RTy->getNumElements() * 2); |
7708 | 16 | llvm::Type *Tys[2] = { |
7709 | 16 | RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, |
7710 | 16 | /*isQuad*/ false))}; |
7711 | 16 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); |
7712 | 12 | } |
7713 | 4 | case NEON::BI__builtin_neon_vqdmulhq_laneq_v: |
7714 | 8 | case NEON::BI__builtin_neon_vqdmulh_laneq_v: |
7715 | 12 | case NEON::BI__builtin_neon_vqrdmulhq_laneq_v: |
7716 | 16 | case NEON::BI__builtin_neon_vqrdmulh_laneq_v: { |
7717 | 16 | llvm::Type *Tys[2] = { |
7718 | 16 | Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, |
7719 | 16 | /*isQuad*/ true))}; |
7720 | 16 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); |
7721 | 12 | } |
7722 | 16 | case NEON::BI__builtin_neon_vqshl_n_v: |
7723 | 32 | case NEON::BI__builtin_neon_vqshlq_n_v: |
7724 | 32 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", |
7725 | 32 | 1, false); |
7726 | 8 | case NEON::BI__builtin_neon_vqshlu_n_v: |
7727 | 16 | case NEON::BI__builtin_neon_vqshluq_n_v: |
7728 | 16 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", |
7729 | 16 | 1, false); |
7730 | 7 | case NEON::BI__builtin_neon_vrecpe_v: |
7731 | 14 | case NEON::BI__builtin_neon_vrecpeq_v: |
7732 | 21 | case NEON::BI__builtin_neon_vrsqrte_v: |
7733 | 28 | case NEON::BI__builtin_neon_vrsqrteq_v: |
7734 | 28 | Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic20 : AltLLVMIntrinsic8 ; |
7735 | 28 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); |
7736 | 14 | case NEON::BI__builtin_neon_vrndi_v: |
7737 | 26 | case NEON::BI__builtin_neon_vrndiq_v: |
7738 | 26 | Int = Builder.getIsFPConstrained() |
7739 | 26 | ? Intrinsic::experimental_constrained_nearbyint9 |
7740 | 26 | : Intrinsic::nearbyint17 ; |
7741 | 26 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); |
7742 | 16 | case NEON::BI__builtin_neon_vrshr_n_v: |
7743 | 32 | case NEON::BI__builtin_neon_vrshrq_n_v: |
7744 | 32 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", |
7745 | 32 | 1, true); |
7746 | 1 | case NEON::BI__builtin_neon_vsha512hq_u64: |
7747 | 2 | case NEON::BI__builtin_neon_vsha512h2q_u64: |
7748 | 3 | case NEON::BI__builtin_neon_vsha512su0q_u64: |
7749 | 4 | case NEON::BI__builtin_neon_vsha512su1q_u64: { |
7750 | 4 | Function *F = CGM.getIntrinsic(Int); |
7751 | 4 | return EmitNeonCall(F, Ops, ""); |
7752 | 3 | } |
7753 | 16 | case NEON::BI__builtin_neon_vshl_n_v: |
7754 | 32 | case NEON::BI__builtin_neon_vshlq_n_v: |
7755 | 32 | Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); |
7756 | 32 | return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], |
7757 | 32 | "vshl_n"); |
7758 | 39 | case NEON::BI__builtin_neon_vshll_n_v: { |
7759 | 39 | llvm::FixedVectorType *SrcTy = |
7760 | 39 | llvm::FixedVectorType::getTruncatedElementVectorType(VTy); |
7761 | 39 | Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); |
7762 | 39 | if (Usgn) |
7763 | 18 | Ops[0] = Builder.CreateZExt(Ops[0], VTy); |
7764 | 21 | else |
7765 | 21 | Ops[0] = Builder.CreateSExt(Ops[0], VTy); |
7766 | 39 | Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); |
7767 | 39 | return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); |
7768 | 16 | } |
7769 | 18 | case NEON::BI__builtin_neon_vshrn_n_v: { |
7770 | 18 | llvm::FixedVectorType *SrcTy = |
7771 | 18 | llvm::FixedVectorType::getExtendedElementVectorType(VTy); |
7772 | 18 | Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); |
7773 | 18 | Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); |
7774 | 18 | if (Usgn) |
7775 | 9 | Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); |
7776 | 9 | else |
7777 | 9 | Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); |
7778 | 18 | return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); |
7779 | 16 | } |
7780 | 22 | case NEON::BI__builtin_neon_vshr_n_v: |
7781 | 38 | case NEON::BI__builtin_neon_vshrq_n_v: |
7782 | 38 | return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); |
7783 | 13 | case NEON::BI__builtin_neon_vst1_v: |
7784 | 27 | case NEON::BI__builtin_neon_vst1q_v: |
7785 | 40 | case NEON::BI__builtin_neon_vst2_v: |
7786 | 51 | case NEON::BI__builtin_neon_vst2q_v: |
7787 | 64 | case NEON::BI__builtin_neon_vst3_v: |
7788 | 75 | case NEON::BI__builtin_neon_vst3q_v: |
7789 | 88 | case NEON::BI__builtin_neon_vst4_v: |
7790 | 99 | case NEON::BI__builtin_neon_vst4q_v: |
7791 | 110 | case NEON::BI__builtin_neon_vst2_lane_v: |
7792 | 118 | case NEON::BI__builtin_neon_vst2q_lane_v: |
7793 | 129 | case NEON::BI__builtin_neon_vst3_lane_v: |
7794 | 137 | case NEON::BI__builtin_neon_vst3q_lane_v: |
7795 | 148 | case NEON::BI__builtin_neon_vst4_lane_v: |
7796 | 156 | case NEON::BI__builtin_neon_vst4q_lane_v: { |
7797 | 156 | llvm::Type *Tys[] = {Int8PtrTy, Ty}; |
7798 | 156 | Ops.push_back(getAlignmentValue32(PtrOp0)); |
7799 | 156 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); |
7800 | 148 | } |
7801 | 1 | case NEON::BI__builtin_neon_vsm3partw1q_u32: |
7802 | 2 | case NEON::BI__builtin_neon_vsm3partw2q_u32: |
7803 | 3 | case NEON::BI__builtin_neon_vsm3ss1q_u32: |
7804 | 4 | case NEON::BI__builtin_neon_vsm4ekeyq_u32: |
7805 | 5 | case NEON::BI__builtin_neon_vsm4eq_u32: { |
7806 | 5 | Function *F = CGM.getIntrinsic(Int); |
7807 | 5 | return EmitNeonCall(F, Ops, ""); |
7808 | 4 | } |
7809 | 1 | case NEON::BI__builtin_neon_vsm3tt1aq_u32: |
7810 | 2 | case NEON::BI__builtin_neon_vsm3tt1bq_u32: |
7811 | 3 | case NEON::BI__builtin_neon_vsm3tt2aq_u32: |
7812 | 4 | case NEON::BI__builtin_neon_vsm3tt2bq_u32: { |
7813 | 4 | Function *F = CGM.getIntrinsic(Int); |
7814 | 4 | Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); |
7815 | 4 | return EmitNeonCall(F, Ops, ""); |
7816 | 3 | } |
7817 | 28 | case NEON::BI__builtin_neon_vst1_x2_v: |
7818 | 56 | case NEON::BI__builtin_neon_vst1q_x2_v: |
7819 | 84 | case NEON::BI__builtin_neon_vst1_x3_v: |
7820 | 112 | case NEON::BI__builtin_neon_vst1q_x3_v: |
7821 | 140 | case NEON::BI__builtin_neon_vst1_x4_v: |
7822 | 168 | case NEON::BI__builtin_neon_vst1q_x4_v: { |
7823 | | // TODO: Currently in AArch32 mode the pointer operand comes first, whereas |
7824 | | // in AArch64 it comes last. We may want to stick to one or another. |
7825 | 168 | if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be78 || |
7826 | 168 | Arch == llvm::Triple::aarch64_3278 ) { |
7827 | 90 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
7828 | 90 | std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); |
7829 | 90 | return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); |
7830 | 90 | } |
7831 | 78 | llvm::Type *Tys[2] = {UnqualPtrTy, VTy}; |
7832 | 78 | return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); |
7833 | 168 | } |
7834 | 12 | case NEON::BI__builtin_neon_vsubhn_v: { |
7835 | 12 | llvm::FixedVectorType *SrcTy = |
7836 | 12 | llvm::FixedVectorType::getExtendedElementVectorType(VTy); |
7837 | | |
7838 | | // %sum = add <4 x i32> %lhs, %rhs |
7839 | 12 | Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); |
7840 | 12 | Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); |
7841 | 12 | Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); |
7842 | | |
7843 | | // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> |
7844 | 12 | Constant *ShiftAmt = |
7845 | 12 | ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); |
7846 | 12 | Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); |
7847 | | |
7848 | | // %res = trunc <4 x i32> %high to <4 x i16> |
7849 | 12 | return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); |
7850 | 168 | } |
7851 | 10 | case NEON::BI__builtin_neon_vtrn_v: |
7852 | 20 | case NEON::BI__builtin_neon_vtrnq_v: { |
7853 | 20 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
7854 | 20 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
7855 | 20 | Value *SV = nullptr; |
7856 | | |
7857 | 60 | for (unsigned vi = 0; vi != 2; ++vi40 ) { |
7858 | 40 | SmallVector<int, 16> Indices; |
7859 | 178 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2138 ) { |
7860 | 138 | Indices.push_back(i+vi); |
7861 | 138 | Indices.push_back(i+e+vi); |
7862 | 138 | } |
7863 | 40 | Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); |
7864 | 40 | SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); |
7865 | 40 | SV = Builder.CreateDefaultAlignedStore(SV, Addr); |
7866 | 40 | } |
7867 | 20 | return SV; |
7868 | 10 | } |
7869 | 19 | case NEON::BI__builtin_neon_vtst_v: |
7870 | 38 | case NEON::BI__builtin_neon_vtstq_v: { |
7871 | 38 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
7872 | 38 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
7873 | 38 | Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); |
7874 | 38 | Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], |
7875 | 38 | ConstantAggregateZero::get(Ty)); |
7876 | 38 | return Builder.CreateSExt(Ops[0], Ty, "vtst"); |
7877 | 19 | } |
7878 | 10 | case NEON::BI__builtin_neon_vuzp_v: |
7879 | 20 | case NEON::BI__builtin_neon_vuzpq_v: { |
7880 | 20 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
7881 | 20 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
7882 | 20 | Value *SV = nullptr; |
7883 | | |
7884 | 60 | for (unsigned vi = 0; vi != 2; ++vi40 ) { |
7885 | 40 | SmallVector<int, 16> Indices; |
7886 | 316 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i276 ) |
7887 | 276 | Indices.push_back(2*i+vi); |
7888 | | |
7889 | 40 | Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); |
7890 | 40 | SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); |
7891 | 40 | SV = Builder.CreateDefaultAlignedStore(SV, Addr); |
7892 | 40 | } |
7893 | 20 | return SV; |
7894 | 10 | } |
7895 | 1 | case NEON::BI__builtin_neon_vxarq_u64: { |
7896 | 1 | Function *F = CGM.getIntrinsic(Int); |
7897 | 1 | Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); |
7898 | 1 | return EmitNeonCall(F, Ops, ""); |
7899 | 10 | } |
7900 | 10 | case NEON::BI__builtin_neon_vzip_v: |
7901 | 21 | case NEON::BI__builtin_neon_vzipq_v: { |
7902 | 21 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
7903 | 21 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
7904 | 21 | Value *SV = nullptr; |
7905 | | |
7906 | 63 | for (unsigned vi = 0; vi != 2; ++vi42 ) { |
7907 | 42 | SmallVector<int, 16> Indices; |
7908 | 196 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2154 ) { |
7909 | 154 | Indices.push_back((i + vi*e) >> 1); |
7910 | 154 | Indices.push_back(((i + vi*e) >> 1)+e); |
7911 | 154 | } |
7912 | 42 | Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); |
7913 | 42 | SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); |
7914 | 42 | SV = Builder.CreateDefaultAlignedStore(SV, Addr); |
7915 | 42 | } |
7916 | 21 | return SV; |
7917 | 10 | } |
7918 | 2 | case NEON::BI__builtin_neon_vdot_s32: |
7919 | 4 | case NEON::BI__builtin_neon_vdot_u32: |
7920 | 6 | case NEON::BI__builtin_neon_vdotq_s32: |
7921 | 8 | case NEON::BI__builtin_neon_vdotq_u32: { |
7922 | 8 | auto *InputTy = |
7923 | 8 | llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); |
7924 | 8 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7925 | 8 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); |
7926 | 6 | } |
7927 | 1 | case NEON::BI__builtin_neon_vfmlal_low_f16: |
7928 | 2 | case NEON::BI__builtin_neon_vfmlalq_low_f16: { |
7929 | 2 | auto *InputTy = |
7930 | 2 | llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); |
7931 | 2 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7932 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); |
7933 | 1 | } |
7934 | 1 | case NEON::BI__builtin_neon_vfmlsl_low_f16: |
7935 | 2 | case NEON::BI__builtin_neon_vfmlslq_low_f16: { |
7936 | 2 | auto *InputTy = |
7937 | 2 | llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); |
7938 | 2 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7939 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); |
7940 | 1 | } |
7941 | 1 | case NEON::BI__builtin_neon_vfmlal_high_f16: |
7942 | 2 | case NEON::BI__builtin_neon_vfmlalq_high_f16: { |
7943 | 2 | auto *InputTy = |
7944 | 2 | llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); |
7945 | 2 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7946 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); |
7947 | 1 | } |
7948 | 1 | case NEON::BI__builtin_neon_vfmlsl_high_f16: |
7949 | 2 | case NEON::BI__builtin_neon_vfmlslq_high_f16: { |
7950 | 2 | auto *InputTy = |
7951 | 2 | llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); |
7952 | 2 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7953 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); |
7954 | 1 | } |
7955 | 2 | case NEON::BI__builtin_neon_vmmlaq_s32: |
7956 | 4 | case NEON::BI__builtin_neon_vmmlaq_u32: { |
7957 | 4 | auto *InputTy = |
7958 | 4 | llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); |
7959 | 4 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7960 | 4 | return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla"); |
7961 | 2 | } |
7962 | 2 | case NEON::BI__builtin_neon_vusmmlaq_s32: { |
7963 | 2 | auto *InputTy = |
7964 | 2 | llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); |
7965 | 2 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7966 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla"); |
7967 | 2 | } |
7968 | 2 | case NEON::BI__builtin_neon_vusdot_s32: |
7969 | 4 | case NEON::BI__builtin_neon_vusdotq_s32: { |
7970 | 4 | auto *InputTy = |
7971 | 4 | llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); |
7972 | 4 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7973 | 4 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot"); |
7974 | 2 | } |
7975 | 3 | case NEON::BI__builtin_neon_vbfdot_f32: |
7976 | 6 | case NEON::BI__builtin_neon_vbfdotq_f32: { |
7977 | 6 | llvm::Type *InputTy = |
7978 | 6 | llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16); |
7979 | 6 | llvm::Type *Tys[2] = { Ty, InputTy }; |
7980 | 6 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot"); |
7981 | 3 | } |
7982 | 2 | case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: { |
7983 | 2 | llvm::Type *Tys[1] = { Ty }; |
7984 | 2 | Function *F = CGM.getIntrinsic(Int, Tys); |
7985 | 2 | return EmitNeonCall(F, Ops, "vcvtfp2bf"); |
7986 | 3 | } |
7987 | | |
7988 | 2.84k | } |
7989 | | |
7990 | 735 | assert(Int && "Expected valid intrinsic number"); |
7991 | | |
7992 | | // Determine the type(s) of this overloaded AArch64 intrinsic. |
7993 | 735 | Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); |
7994 | | |
7995 | 735 | Value *Result = EmitNeonCall(F, Ops, NameHint); |
7996 | 735 | llvm::Type *ResultType = ConvertType(E->getType()); |
7997 | | // AArch64 intrinsic one-element vector type cast to |
7998 | | // scalar type expected by the builtin |
7999 | 735 | return Builder.CreateBitCast(Result, ResultType, NameHint); |
8000 | 735 | } |
8001 | | |
8002 | | Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( |
8003 | | Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, |
8004 | 142 | const CmpInst::Predicate Ip, const Twine &Name) { |
8005 | 142 | llvm::Type *OTy = Op->getType(); |
8006 | | |
8007 | | // FIXME: this is utterly horrific. We should not be looking at previous |
8008 | | // codegen context to find out what needs doing. Unfortunately TableGen |
8009 | | // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 |
8010 | | // (etc). |
8011 | 142 | if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) |
8012 | 78 | OTy = BI->getOperand(0)->getType(); |
8013 | | |
8014 | 142 | Op = Builder.CreateBitCast(Op, OTy); |
8015 | 142 | if (OTy->getScalarType()->isFloatingPointTy()) { |
8016 | 85 | if (Fp == CmpInst::FCMP_OEQ) |
8017 | 17 | Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); |
8018 | 68 | else |
8019 | 68 | Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy)); |
8020 | 85 | } else { |
8021 | 57 | Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); |
8022 | 57 | } |
8023 | 142 | return Builder.CreateSExt(Op, Ty, Name); |
8024 | 142 | } |
8025 | | |
8026 | | static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, |
8027 | | Value *ExtOp, Value *IndexOp, |
8028 | | llvm::Type *ResTy, unsigned IntID, |
8029 | 24 | const char *Name) { |
8030 | 24 | SmallVector<Value *, 2> TblOps; |
8031 | 24 | if (ExtOp) |
8032 | 6 | TblOps.push_back(ExtOp); |
8033 | | |
8034 | | // Build a vector containing sequential number like (0, 1, 2, ..., 15) |
8035 | 24 | SmallVector<int, 16> Indices; |
8036 | 24 | auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
8037 | 216 | for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i192 ) { |
8038 | 192 | Indices.push_back(2*i); |
8039 | 192 | Indices.push_back(2*i+1); |
8040 | 192 | } |
8041 | | |
8042 | 24 | int PairPos = 0, End = Ops.size() - 1; |
8043 | 48 | while (PairPos < End) { |
8044 | 24 | TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], |
8045 | 24 | Ops[PairPos+1], Indices, |
8046 | 24 | Name)); |
8047 | 24 | PairPos += 2; |
8048 | 24 | } |
8049 | | |
8050 | | // If there's an odd number of 64-bit lookup table, fill the high 64-bit |
8051 | | // of the 128-bit lookup table with zero. |
8052 | 24 | if (PairPos == End) { |
8053 | 12 | Value *ZeroTbl = ConstantAggregateZero::get(TblTy); |
8054 | 12 | TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], |
8055 | 12 | ZeroTbl, Indices, Name)); |
8056 | 12 | } |
8057 | | |
8058 | 24 | Function *TblF; |
8059 | 24 | TblOps.push_back(IndexOp); |
8060 | 24 | TblF = CGF.CGM.getIntrinsic(IntID, ResTy); |
8061 | | |
8062 | 24 | return CGF.EmitNeonCall(TblF, TblOps, Name); |
8063 | 24 | } |
8064 | | |
8065 | 6.67k | Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { |
8066 | 6.67k | unsigned Value; |
8067 | 6.67k | switch (BuiltinID) { |
8068 | 6.66k | default: |
8069 | 6.66k | return nullptr; |
8070 | 2 | case clang::ARM::BI__builtin_arm_nop: |
8071 | 2 | Value = 0; |
8072 | 2 | break; |
8073 | 2 | case clang::ARM::BI__builtin_arm_yield: |
8074 | 3 | case clang::ARM::BI__yield: |
8075 | 3 | Value = 1; |
8076 | 3 | break; |
8077 | 2 | case clang::ARM::BI__builtin_arm_wfe: |
8078 | 3 | case clang::ARM::BI__wfe: |
8079 | 3 | Value = 2; |
8080 | 3 | break; |
8081 | 2 | case clang::ARM::BI__builtin_arm_wfi: |
8082 | 3 | case clang::ARM::BI__wfi: |
8083 | 3 | Value = 3; |
8084 | 3 | break; |
8085 | 2 | case clang::ARM::BI__builtin_arm_sev: |
8086 | 3 | case clang::ARM::BI__sev: |
8087 | 3 | Value = 4; |
8088 | 3 | break; |
8089 | 2 | case clang::ARM::BI__builtin_arm_sevl: |
8090 | 3 | case clang::ARM::BI__sevl: |
8091 | 3 | Value = 5; |
8092 | 3 | break; |
8093 | 6.67k | } |
8094 | | |
8095 | 17 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), |
8096 | 17 | llvm::ConstantInt::get(Int32Ty, Value)); |
8097 | 6.67k | } |
8098 | | |
8099 | | enum SpecialRegisterAccessKind { |
8100 | | NormalRead, |
8101 | | VolatileRead, |
8102 | | Write, |
8103 | | }; |
8104 | | |
8105 | | // Generates the IR for __builtin_read_exec_*. |
8106 | | // Lowers the builtin to amdgcn_ballot intrinsic. |
8107 | | static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, |
8108 | | llvm::Type *RegisterType, |
8109 | 30 | llvm::Type *ValueType, bool isExecHi) { |
8110 | 30 | CodeGen::CGBuilderTy &Builder = CGF.Builder; |
8111 | 30 | CodeGen::CodeGenModule &CGM = CGF.CGM; |
8112 | | |
8113 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); |
8114 | 30 | llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); |
8115 | | |
8116 | 30 | if (isExecHi) { |
8117 | 10 | Value *Rt2 = Builder.CreateLShr(Call, 32); |
8118 | 10 | Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); |
8119 | 10 | return Rt2; |
8120 | 10 | } |
8121 | | |
8122 | 20 | return Call; |
8123 | 30 | } |
8124 | | |
8125 | | // Generates the IR for the read/write special register builtin, |
8126 | | // ValueType is the type of the value that is to be written or read, |
8127 | | // RegisterType is the type of the register being written to or read from. |
8128 | | static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, |
8129 | | const CallExpr *E, |
8130 | | llvm::Type *RegisterType, |
8131 | | llvm::Type *ValueType, |
8132 | | SpecialRegisterAccessKind AccessKind, |
8133 | 96 | StringRef SysReg = "") { |
8134 | | // write and register intrinsics only support 32, 64 and 128 bit operations. |
8135 | 96 | assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) || |
8136 | 96 | RegisterType->isIntegerTy(128)) && |
8137 | 96 | "Unsupported size for register."); |
8138 | | |
8139 | 96 | CodeGen::CGBuilderTy &Builder = CGF.Builder; |
8140 | 96 | CodeGen::CodeGenModule &CGM = CGF.CGM; |
8141 | 96 | LLVMContext &Context = CGM.getLLVMContext(); |
8142 | | |
8143 | 96 | if (SysReg.empty()) { |
8144 | 96 | const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); |
8145 | 96 | SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); |
8146 | 96 | } |
8147 | | |
8148 | 96 | llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; |
8149 | 96 | llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); |
8150 | 96 | llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); |
8151 | | |
8152 | 96 | llvm::Type *Types[] = { RegisterType }; |
8153 | | |
8154 | 96 | bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32)76 ; |
8155 | 96 | assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) |
8156 | 96 | && "Can't fit 64-bit value in 32-bit register"); |
8157 | | |
8158 | 96 | if (AccessKind != Write) { |
8159 | 48 | assert(AccessKind == NormalRead || AccessKind == VolatileRead); |
8160 | 48 | llvm::Function *F = CGM.getIntrinsic( |
8161 | 48 | AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register |
8162 | 48 | : llvm::Intrinsic::read_register0 , |
8163 | 48 | Types); |
8164 | 48 | llvm::Value *Call = Builder.CreateCall(F, Metadata); |
8165 | | |
8166 | 48 | if (MixedTypes) |
8167 | | // Read into 64 bit register and then truncate result to 32 bit. |
8168 | 13 | return Builder.CreateTrunc(Call, ValueType); |
8169 | | |
8170 | 35 | if (ValueType->isPointerTy()) |
8171 | | // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). |
8172 | 11 | return Builder.CreateIntToPtr(Call, ValueType); |
8173 | | |
8174 | 24 | return Call; |
8175 | 35 | } |
8176 | | |
8177 | 48 | llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); |
8178 | 48 | llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); |
8179 | 48 | if (MixedTypes) { |
8180 | | // Extend 32 bit write value to 64 bit to pass to write. |
8181 | 13 | ArgValue = Builder.CreateZExt(ArgValue, RegisterType); |
8182 | 13 | return Builder.CreateCall(F, { Metadata, ArgValue }); |
8183 | 13 | } |
8184 | | |
8185 | 35 | if (ValueType->isPointerTy()) { |
8186 | | // Have VoidPtrTy ArgValue but want to return an i32/i64. |
8187 | 11 | ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); |
8188 | 11 | return Builder.CreateCall(F, { Metadata, ArgValue }); |
8189 | 11 | } |
8190 | | |
8191 | 24 | return Builder.CreateCall(F, { Metadata, ArgValue }); |
8192 | 35 | } |
8193 | | |
8194 | | /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra |
8195 | | /// argument that specifies the vector type. |
8196 | 1.77k | static bool HasExtraNeonArgument(unsigned BuiltinID) { |
8197 | 1.77k | switch (BuiltinID) { |
8198 | 1.64k | default: break; |
8199 | 1.64k | case NEON::BI__builtin_neon_vget_lane_i8: |
8200 | 7 | case NEON::BI__builtin_neon_vget_lane_i16: |
8201 | 41 | case NEON::BI__builtin_neon_vget_lane_bf16: |
8202 | 43 | case NEON::BI__builtin_neon_vget_lane_i32: |
8203 | 45 | case NEON::BI__builtin_neon_vget_lane_i64: |
8204 | 46 | case NEON::BI__builtin_neon_vget_lane_f32: |
8205 | 49 | case NEON::BI__builtin_neon_vgetq_lane_i8: |
8206 | 53 | case NEON::BI__builtin_neon_vgetq_lane_i16: |
8207 | 87 | case NEON::BI__builtin_neon_vgetq_lane_bf16: |
8208 | 89 | case NEON::BI__builtin_neon_vgetq_lane_i32: |
8209 | 91 | case NEON::BI__builtin_neon_vgetq_lane_i64: |
8210 | 92 | case NEON::BI__builtin_neon_vgetq_lane_f32: |
8211 | 94 | case NEON::BI__builtin_neon_vduph_lane_bf16: |
8212 | 96 | case NEON::BI__builtin_neon_vduph_laneq_bf16: |
8213 | 99 | case NEON::BI__builtin_neon_vset_lane_i8: |
8214 | 103 | case NEON::BI__builtin_neon_vset_lane_i16: |
8215 | 105 | case NEON::BI__builtin_neon_vset_lane_bf16: |
8216 | 107 | case NEON::BI__builtin_neon_vset_lane_i32: |
8217 | 109 | case NEON::BI__builtin_neon_vset_lane_i64: |
8218 | 110 | case NEON::BI__builtin_neon_vset_lane_f32: |
8219 | 113 | case NEON::BI__builtin_neon_vsetq_lane_i8: |
8220 | 117 | case NEON::BI__builtin_neon_vsetq_lane_i16: |
8221 | 119 | case NEON::BI__builtin_neon_vsetq_lane_bf16: |
8222 | 121 | case NEON::BI__builtin_neon_vsetq_lane_i32: |
8223 | 123 | case NEON::BI__builtin_neon_vsetq_lane_i64: |
8224 | 124 | case NEON::BI__builtin_neon_vsetq_lane_f32: |
8225 | 125 | case NEON::BI__builtin_neon_vsha1h_u32: |
8226 | 126 | case NEON::BI__builtin_neon_vsha1cq_u32: |
8227 | 127 | case NEON::BI__builtin_neon_vsha1pq_u32: |
8228 | 128 | case NEON::BI__builtin_neon_vsha1mq_u32: |
8229 | 130 | case NEON::BI__builtin_neon_vcvth_bf16_f32: |
8230 | 131 | case clang::ARM::BI_MoveToCoprocessor: |
8231 | 132 | case clang::ARM::BI_MoveToCoprocessor2: |
8232 | 132 | return false; |
8233 | 1.77k | } |
8234 | 1.64k | return true; |
8235 | 1.77k | } |
8236 | | |
8237 | | Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, |
8238 | | const CallExpr *E, |
8239 | | ReturnValueSlot ReturnValue, |
8240 | 6.67k | llvm::Triple::ArchType Arch) { |
8241 | 6.67k | if (auto Hint = GetValueForARMHint(BuiltinID)) |
8242 | 17 | return Hint; |
8243 | | |
8244 | 6.66k | if (BuiltinID == clang::ARM::BI__emit) { |
8245 | 2 | bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; |
8246 | 2 | llvm::FunctionType *FTy = |
8247 | 2 | llvm::FunctionType::get(VoidTy, /*Variadic=*/false); |
8248 | | |
8249 | 2 | Expr::EvalResult Result; |
8250 | 2 | if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) |
8251 | 0 | llvm_unreachable("Sema will ensure that the parameter is constant"); |
8252 | | |
8253 | 2 | llvm::APSInt Value = Result.Val.getInt(); |
8254 | 2 | uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 320 ).getZExtValue(); |
8255 | | |
8256 | 2 | llvm::InlineAsm *Emit = |
8257 | 2 | IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", |
8258 | 2 | /*hasSideEffects=*/true) |
8259 | 2 | : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", |
8260 | 0 | /*hasSideEffects=*/true); |
8261 | | |
8262 | 2 | return Builder.CreateCall(Emit); |
8263 | 2 | } |
8264 | | |
8265 | 6.66k | if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) { |
8266 | 2 | Value *Option = EmitScalarExpr(E->getArg(0)); |
8267 | 2 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); |
8268 | 2 | } |
8269 | | |
8270 | 6.65k | if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) { |
8271 | 7 | Value *Address = EmitScalarExpr(E->getArg(0)); |
8272 | 7 | Value *RW = EmitScalarExpr(E->getArg(1)); |
8273 | 7 | Value *IsData = EmitScalarExpr(E->getArg(2)); |
8274 | | |
8275 | | // Locality is not supported on ARM target |
8276 | 7 | Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); |
8277 | | |
8278 | 7 | Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); |
8279 | 7 | return Builder.CreateCall(F, {Address, RW, Locality, IsData}); |
8280 | 7 | } |
8281 | | |
8282 | 6.65k | if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) { |
8283 | 4 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
8284 | 4 | return Builder.CreateCall( |
8285 | 4 | CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); |
8286 | 4 | } |
8287 | | |
8288 | 6.64k | if (BuiltinID == clang::ARM::BI__builtin_arm_clz || |
8289 | 6.64k | BuiltinID == clang::ARM::BI__builtin_arm_clz646.64k ) { |
8290 | 3 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
8291 | 3 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); |
8292 | 3 | Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); |
8293 | 3 | if (BuiltinID == clang::ARM::BI__builtin_arm_clz64) |
8294 | 1 | Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); |
8295 | 3 | return Res; |
8296 | 3 | } |
8297 | | |
8298 | | |
8299 | 6.64k | if (BuiltinID == clang::ARM::BI__builtin_arm_cls) { |
8300 | 4 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
8301 | 4 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); |
8302 | 4 | } |
8303 | 6.64k | if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) { |
8304 | 2 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
8305 | 2 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg, |
8306 | 2 | "cls"); |
8307 | 2 | } |
8308 | | |
8309 | 6.63k | if (BuiltinID == clang::ARM::BI__clear_cache) { |
8310 | 2 | assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); |
8311 | 2 | const FunctionDecl *FD = E->getDirectCallee(); |
8312 | 2 | Value *Ops[2]; |
8313 | 6 | for (unsigned i = 0; i < 2; i++4 ) |
8314 | 4 | Ops[i] = EmitScalarExpr(E->getArg(i)); |
8315 | 2 | llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); |
8316 | 2 | llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); |
8317 | 2 | StringRef Name = FD->getName(); |
8318 | 2 | return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); |
8319 | 2 | } |
8320 | | |
8321 | 6.63k | if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr || |
8322 | 6.63k | BuiltinID == clang::ARM::BI__builtin_arm_mcrr26.63k ) { |
8323 | 2 | Function *F; |
8324 | | |
8325 | 2 | switch (BuiltinID) { |
8326 | 0 | default: llvm_unreachable("unexpected builtin"); |
8327 | 1 | case clang::ARM::BI__builtin_arm_mcrr: |
8328 | 1 | F = CGM.getIntrinsic(Intrinsic::arm_mcrr); |
8329 | 1 | break; |
8330 | 1 | case clang::ARM::BI__builtin_arm_mcrr2: |
8331 | 1 | F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); |
8332 | 1 | break; |
8333 | 2 | } |
8334 | | |
8335 | | // MCRR{2} instruction has 5 operands but |
8336 | | // the intrinsic has 4 because Rt and Rt2 |
8337 | | // are represented as a single unsigned 64 |
8338 | | // bit integer in the intrinsic definition |
8339 | | // but internally it's represented as 2 32 |
8340 | | // bit integers. |
8341 | | |
8342 | 2 | Value *Coproc = EmitScalarExpr(E->getArg(0)); |
8343 | 2 | Value *Opc1 = EmitScalarExpr(E->getArg(1)); |
8344 | 2 | Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); |
8345 | 2 | Value *CRm = EmitScalarExpr(E->getArg(3)); |
8346 | | |
8347 | 2 | Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); |
8348 | 2 | Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); |
8349 | 2 | Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); |
8350 | 2 | Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); |
8351 | | |
8352 | 2 | return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); |
8353 | 2 | } |
8354 | | |
8355 | 6.63k | if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc || |
8356 | 6.63k | BuiltinID == clang::ARM::BI__builtin_arm_mrrc26.63k ) { |
8357 | 2 | Function *F; |
8358 | | |
8359 | 2 | switch (BuiltinID) { |
8360 | 0 | default: llvm_unreachable("unexpected builtin"); |
8361 | 1 | case clang::ARM::BI__builtin_arm_mrrc: |
8362 | 1 | F = CGM.getIntrinsic(Intrinsic::arm_mrrc); |
8363 | 1 | break; |
8364 | 1 | case clang::ARM::BI__builtin_arm_mrrc2: |
8365 | 1 | F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); |
8366 | 1 | break; |
8367 | 2 | } |
8368 | | |
8369 | 2 | Value *Coproc = EmitScalarExpr(E->getArg(0)); |
8370 | 2 | Value *Opc1 = EmitScalarExpr(E->getArg(1)); |
8371 | 2 | Value *CRm = EmitScalarExpr(E->getArg(2)); |
8372 | 2 | Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); |
8373 | | |
8374 | | // Returns an unsigned 64 bit integer, represented |
8375 | | // as two 32 bit integers. |
8376 | | |
8377 | 2 | Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); |
8378 | 2 | Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); |
8379 | 2 | Rt = Builder.CreateZExt(Rt, Int64Ty); |
8380 | 2 | Rt1 = Builder.CreateZExt(Rt1, Int64Ty); |
8381 | | |
8382 | 2 | Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); |
8383 | 2 | RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); |
8384 | 2 | RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); |
8385 | | |
8386 | 2 | return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); |
8387 | 2 | } |
8388 | | |
8389 | 6.63k | if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd || |
8390 | 6.63k | ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex || |
8391 | 6.63k | BuiltinID == clang::ARM::BI__builtin_arm_ldaex6.62k ) && |
8392 | 6.63k | getContext().getTypeSize(E->getType()) == 6420 ) || |
8393 | 6.63k | BuiltinID == clang::ARM::BI__ldrexd6.62k ) { |
8394 | 7 | Function *F; |
8395 | | |
8396 | 7 | switch (BuiltinID) { |
8397 | 0 | default: llvm_unreachable("unexpected builtin"); |
8398 | 3 | case clang::ARM::BI__builtin_arm_ldaex: |
8399 | 3 | F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); |
8400 | 3 | break; |
8401 | 0 | case clang::ARM::BI__builtin_arm_ldrexd: |
8402 | 3 | case clang::ARM::BI__builtin_arm_ldrex: |
8403 | 4 | case clang::ARM::BI__ldrexd: |
8404 | 4 | F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); |
8405 | 4 | break; |
8406 | 7 | } |
8407 | | |
8408 | 7 | Value *LdPtr = EmitScalarExpr(E->getArg(0)); |
8409 | 7 | Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), |
8410 | 7 | "ldrexd"); |
8411 | | |
8412 | 7 | Value *Val0 = Builder.CreateExtractValue(Val, 1); |
8413 | 7 | Value *Val1 = Builder.CreateExtractValue(Val, 0); |
8414 | 7 | Val0 = Builder.CreateZExt(Val0, Int64Ty); |
8415 | 7 | Val1 = Builder.CreateZExt(Val1, Int64Ty); |
8416 | | |
8417 | 7 | Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); |
8418 | 7 | Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); |
8419 | 7 | Val = Builder.CreateOr(Val, Val1); |
8420 | 7 | return Builder.CreateBitCast(Val, ConvertType(E->getType())); |
8421 | 7 | } |
8422 | | |
8423 | 6.62k | if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex || |
8424 | 6.62k | BuiltinID == clang::ARM::BI__builtin_arm_ldaex6.61k ) { |
8425 | 14 | Value *LoadAddr = EmitScalarExpr(E->getArg(0)); |
8426 | | |
8427 | 14 | QualType Ty = E->getType(); |
8428 | 14 | llvm::Type *RealResTy = ConvertType(Ty); |
8429 | 14 | llvm::Type *IntTy = |
8430 | 14 | llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); |
8431 | | |
8432 | 14 | Function *F = CGM.getIntrinsic( |
8433 | 14 | BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex6 |
8434 | 14 | : Intrinsic::arm_ldrex8 , |
8435 | 14 | UnqualPtrTy); |
8436 | 14 | CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); |
8437 | 14 | Val->addParamAttr( |
8438 | 14 | 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); |
8439 | | |
8440 | 14 | if (RealResTy->isPointerTy()) |
8441 | 4 | return Builder.CreateIntToPtr(Val, RealResTy); |
8442 | 10 | else { |
8443 | 10 | llvm::Type *IntResTy = llvm::IntegerType::get( |
8444 | 10 | getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); |
8445 | 10 | return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), |
8446 | 10 | RealResTy); |
8447 | 10 | } |
8448 | 14 | } |
8449 | | |
8450 | 6.61k | if (BuiltinID == clang::ARM::BI__builtin_arm_strexd || |
8451 | 6.61k | ((BuiltinID == clang::ARM::BI__builtin_arm_stlex || |
8452 | 6.61k | BuiltinID == clang::ARM::BI__builtin_arm_strex6.60k ) && |
8453 | 6.61k | getContext().getTypeSize(E->getArg(0)->getType()) == 6416 )) { |
8454 | 4 | Function *F = CGM.getIntrinsic( |
8455 | 4 | BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd2 |
8456 | 4 | : Intrinsic::arm_strexd2 ); |
8457 | 4 | llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); |
8458 | | |
8459 | 4 | Address Tmp = CreateMemTemp(E->getArg(0)->getType()); |
8460 | 4 | Value *Val = EmitScalarExpr(E->getArg(0)); |
8461 | 4 | Builder.CreateStore(Val, Tmp); |
8462 | | |
8463 | 4 | Address LdPtr = Tmp.withElementType(STy); |
8464 | 4 | Val = Builder.CreateLoad(LdPtr); |
8465 | | |
8466 | 4 | Value *Arg0 = Builder.CreateExtractValue(Val, 0); |
8467 | 4 | Value *Arg1 = Builder.CreateExtractValue(Val, 1); |
8468 | 4 | Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); |
8469 | 4 | return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); |
8470 | 4 | } |
8471 | | |
8472 | 6.60k | if (BuiltinID == clang::ARM::BI__builtin_arm_strex || |
8473 | 6.60k | BuiltinID == clang::ARM::BI__builtin_arm_stlex6.60k ) { |
8474 | 12 | Value *StoreVal = EmitScalarExpr(E->getArg(0)); |
8475 | 12 | Value *StoreAddr = EmitScalarExpr(E->getArg(1)); |
8476 | | |
8477 | 12 | QualType Ty = E->getArg(0)->getType(); |
8478 | 12 | llvm::Type *StoreTy = |
8479 | 12 | llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); |
8480 | | |
8481 | 12 | if (StoreVal->getType()->isPointerTy()) |
8482 | 2 | StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); |
8483 | 10 | else { |
8484 | 10 | llvm::Type *IntTy = llvm::IntegerType::get( |
8485 | 10 | getLLVMContext(), |
8486 | 10 | CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); |
8487 | 10 | StoreVal = Builder.CreateBitCast(StoreVal, IntTy); |
8488 | 10 | StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); |
8489 | 10 | } |
8490 | | |
8491 | 12 | Function *F = CGM.getIntrinsic( |
8492 | 12 | BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex5 |
8493 | 12 | : Intrinsic::arm_strex7 , |
8494 | 12 | StoreAddr->getType()); |
8495 | | |
8496 | 12 | CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); |
8497 | 12 | CI->addParamAttr( |
8498 | 12 | 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); |
8499 | 12 | return CI; |
8500 | 12 | } |
8501 | | |
8502 | 6.59k | if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) { |
8503 | 1 | Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); |
8504 | 1 | return Builder.CreateCall(F); |
8505 | 1 | } |
8506 | | |
8507 | | // CRC32 |
8508 | 6.59k | Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; |
8509 | 6.59k | switch (BuiltinID) { |
8510 | 3 | case clang::ARM::BI__builtin_arm_crc32b: |
8511 | 3 | CRCIntrinsicID = Intrinsic::arm_crc32b; break; |
8512 | 2 | case clang::ARM::BI__builtin_arm_crc32cb: |
8513 | 2 | CRCIntrinsicID = Intrinsic::arm_crc32cb; break; |
8514 | 2 | case clang::ARM::BI__builtin_arm_crc32h: |
8515 | 2 | CRCIntrinsicID = Intrinsic::arm_crc32h; break; |
8516 | 2 | case clang::ARM::BI__builtin_arm_crc32ch: |
8517 | 2 | CRCIntrinsicID = Intrinsic::arm_crc32ch; break; |
8518 | 2 | case clang::ARM::BI__builtin_arm_crc32w: |
8519 | 4 | case clang::ARM::BI__builtin_arm_crc32d: |
8520 | 4 | CRCIntrinsicID = Intrinsic::arm_crc32w; break; |
8521 | 2 | case clang::ARM::BI__builtin_arm_crc32cw: |
8522 | 4 | case clang::ARM::BI__builtin_arm_crc32cd: |
8523 | 4 | CRCIntrinsicID = Intrinsic::arm_crc32cw; break; |
8524 | 6.59k | } |
8525 | | |
8526 | 6.59k | if (CRCIntrinsicID != Intrinsic::not_intrinsic) { |
8527 | 17 | Value *Arg0 = EmitScalarExpr(E->getArg(0)); |
8528 | 17 | Value *Arg1 = EmitScalarExpr(E->getArg(1)); |
8529 | | |
8530 | | // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w |
8531 | | // intrinsics, hence we need different codegen for these cases. |
8532 | 17 | if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d || |
8533 | 17 | BuiltinID == clang::ARM::BI__builtin_arm_crc32cd15 ) { |
8534 | 4 | Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); |
8535 | 4 | Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); |
8536 | 4 | Value *Arg1b = Builder.CreateLShr(Arg1, C1); |
8537 | 4 | Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); |
8538 | | |
8539 | 4 | Function *F = CGM.getIntrinsic(CRCIntrinsicID); |
8540 | 4 | Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); |
8541 | 4 | return Builder.CreateCall(F, {Res, Arg1b}); |
8542 | 13 | } else { |
8543 | 13 | Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); |
8544 | | |
8545 | 13 | Function *F = CGM.getIntrinsic(CRCIntrinsicID); |
8546 | 13 | return Builder.CreateCall(F, {Arg0, Arg1}); |
8547 | 13 | } |
8548 | 17 | } |
8549 | | |
8550 | 6.57k | if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || |
8551 | 6.57k | BuiltinID == clang::ARM::BI__builtin_arm_rsr646.57k || |
8552 | 6.57k | BuiltinID == clang::ARM::BI__builtin_arm_rsrp6.57k || |
8553 | 6.57k | BuiltinID == clang::ARM::BI__builtin_arm_wsr6.56k || |
8554 | 6.57k | BuiltinID == clang::ARM::BI__builtin_arm_wsr646.56k || |
8555 | 6.57k | BuiltinID == clang::ARM::BI__builtin_arm_wsrp6.56k ) { |
8556 | | |
8557 | 16 | SpecialRegisterAccessKind AccessKind = Write; |
8558 | 16 | if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || |
8559 | 16 | BuiltinID == clang::ARM::BI__builtin_arm_rsr6413 || |
8560 | 16 | BuiltinID == clang::ARM::BI__builtin_arm_rsrp10 ) |
8561 | 8 | AccessKind = VolatileRead; |
8562 | | |
8563 | 16 | bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp || |
8564 | 16 | BuiltinID == clang::ARM::BI__builtin_arm_wsrp14 ; |
8565 | | |
8566 | 16 | bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || |
8567 | 16 | BuiltinID == clang::ARM::BI__builtin_arm_wsr6413 ; |
8568 | | |
8569 | 16 | llvm::Type *ValueType; |
8570 | 16 | llvm::Type *RegisterType; |
8571 | 16 | if (IsPointerBuiltin) { |
8572 | 4 | ValueType = VoidPtrTy; |
8573 | 4 | RegisterType = Int32Ty; |
8574 | 12 | } else if (Is64Bit) { |
8575 | 6 | ValueType = RegisterType = Int64Ty; |
8576 | 6 | } else { |
8577 | 6 | ValueType = RegisterType = Int32Ty; |
8578 | 6 | } |
8579 | | |
8580 | 16 | return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, |
8581 | 16 | AccessKind); |
8582 | 16 | } |
8583 | | |
8584 | 6.56k | if (BuiltinID == ARM::BI__builtin_sponentry) { |
8585 | 1 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); |
8586 | 1 | return Builder.CreateCall(F); |
8587 | 1 | } |
8588 | | |
8589 | | // Handle MSVC intrinsics before argument evaluation to prevent double |
8590 | | // evaluation. |
8591 | 6.56k | if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID)) |
8592 | 154 | return EmitMSVCBuiltinExpr(*MsvcIntId, E); |
8593 | | |
8594 | | // Deal with MVE builtins |
8595 | 6.40k | if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) |
8596 | 4.58k | return Result; |
8597 | | // Handle CDE builtins |
8598 | 1.81k | if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) |
8599 | 39 | return Result; |
8600 | | |
8601 | | // Some intrinsics are equivalent - if they are use the base intrinsic ID. |
8602 | 255k | auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) 1.77k { |
8603 | 255k | return P.first == BuiltinID; |
8604 | 255k | }); |
8605 | 1.77k | if (It != end(NEONEquivalentIntrinsicMap)) |
8606 | 129 | BuiltinID = It->second; |
8607 | | |
8608 | | // Find out if any arguments are required to be integer constant |
8609 | | // expressions. |
8610 | 1.77k | unsigned ICEArguments = 0; |
8611 | 1.77k | ASTContext::GetBuiltinTypeError Error; |
8612 | 1.77k | getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); |
8613 | 1.77k | assert(Error == ASTContext::GE_None && "Should not codegen an error"); |
8614 | | |
8615 | 1.77k | auto getAlignmentValue32 = [&](Address addr) -> Value* { |
8616 | 6 | return Builder.getInt32(addr.getAlignment().getQuantity()); |
8617 | 6 | }; |
8618 | | |
8619 | 1.77k | Address PtrOp0 = Address::invalid(); |
8620 | 1.77k | Address PtrOp1 = Address::invalid(); |
8621 | 1.77k | SmallVector<Value*, 4> Ops; |
8622 | 1.77k | bool HasExtraArg = HasExtraNeonArgument(BuiltinID); |
8623 | 1.77k | unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 11.64k : 0132 ); |
8624 | 6.14k | for (unsigned i = 0, e = NumArgs; i != e; i++4.36k ) { |
8625 | 4.36k | if (i == 0) { |
8626 | 1.77k | switch (BuiltinID) { |
8627 | 15 | case NEON::BI__builtin_neon_vld1_v: |
8628 | 33 | case NEON::BI__builtin_neon_vld1q_v: |
8629 | 47 | case NEON::BI__builtin_neon_vld1q_lane_v: |
8630 | 60 | case NEON::BI__builtin_neon_vld1_lane_v: |
8631 | 74 | case NEON::BI__builtin_neon_vld1_dup_v: |
8632 | 87 | case NEON::BI__builtin_neon_vld1q_dup_v: |
8633 | 100 | case NEON::BI__builtin_neon_vst1_v: |
8634 | 114 | case NEON::BI__builtin_neon_vst1q_v: |
8635 | 128 | case NEON::BI__builtin_neon_vst1q_lane_v: |
8636 | 142 | case NEON::BI__builtin_neon_vst1_lane_v: |
8637 | 155 | case NEON::BI__builtin_neon_vst2_v: |
8638 | 166 | case NEON::BI__builtin_neon_vst2q_v: |
8639 | 177 | case NEON::BI__builtin_neon_vst2_lane_v: |
8640 | 185 | case NEON::BI__builtin_neon_vst2q_lane_v: |
8641 | 198 | case NEON::BI__builtin_neon_vst3_v: |
8642 | 209 | case NEON::BI__builtin_neon_vst3q_v: |
8643 | 220 | case NEON::BI__builtin_neon_vst3_lane_v: |
8644 | 228 | case NEON::BI__builtin_neon_vst3q_lane_v: |
8645 | 241 | case NEON::BI__builtin_neon_vst4_v: |
8646 | 252 | case NEON::BI__builtin_neon_vst4q_v: |
8647 | 263 | case NEON::BI__builtin_neon_vst4_lane_v: |
8648 | 271 | case NEON::BI__builtin_neon_vst4q_lane_v: |
8649 | | // Get the alignment for the argument in addition to the value; |
8650 | | // we'll use it later. |
8651 | 271 | PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); |
8652 | 271 | Ops.push_back(PtrOp0.getPointer()); |
8653 | 271 | continue; |
8654 | 1.77k | } |
8655 | 1.77k | } |
8656 | 4.09k | if (i == 1) { |
8657 | 1.51k | switch (BuiltinID) { |
8658 | 13 | case NEON::BI__builtin_neon_vld2_v: |
8659 | 24 | case NEON::BI__builtin_neon_vld2q_v: |
8660 | 37 | case NEON::BI__builtin_neon_vld3_v: |
8661 | 48 | case NEON::BI__builtin_neon_vld3q_v: |
8662 | 61 | case NEON::BI__builtin_neon_vld4_v: |
8663 | 72 | case NEON::BI__builtin_neon_vld4q_v: |
8664 | 83 | case NEON::BI__builtin_neon_vld2_lane_v: |
8665 | 91 | case NEON::BI__builtin_neon_vld2q_lane_v: |
8666 | 102 | case NEON::BI__builtin_neon_vld3_lane_v: |
8667 | 110 | case NEON::BI__builtin_neon_vld3q_lane_v: |
8668 | 121 | case NEON::BI__builtin_neon_vld4_lane_v: |
8669 | 129 | case NEON::BI__builtin_neon_vld4q_lane_v: |
8670 | 142 | case NEON::BI__builtin_neon_vld2_dup_v: |
8671 | 153 | case NEON::BI__builtin_neon_vld2q_dup_v: |
8672 | 166 | case NEON::BI__builtin_neon_vld3_dup_v: |
8673 | 177 | case NEON::BI__builtin_neon_vld3q_dup_v: |
8674 | 190 | case NEON::BI__builtin_neon_vld4_dup_v: |
8675 | 201 | case NEON::BI__builtin_neon_vld4q_dup_v: |
8676 | | // Get the alignment for the argument in addition to the value; |
8677 | | // we'll use it later. |
8678 | 201 | PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); |
8679 | 201 | Ops.push_back(PtrOp1.getPointer()); |
8680 | 201 | continue; |
8681 | 1.51k | } |
8682 | 1.51k | } |
8683 | | |
8684 | 3.88k | Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); |
8685 | 3.88k | } |
8686 | | |
8687 | 1.77k | switch (BuiltinID) { |
8688 | 1.64k | default: break; |
8689 | | |
8690 | 1.64k | case NEON::BI__builtin_neon_vget_lane_i8: |
8691 | 7 | case NEON::BI__builtin_neon_vget_lane_i16: |
8692 | 9 | case NEON::BI__builtin_neon_vget_lane_i32: |
8693 | 11 | case NEON::BI__builtin_neon_vget_lane_i64: |
8694 | 45 | case NEON::BI__builtin_neon_vget_lane_bf16: |
8695 | 46 | case NEON::BI__builtin_neon_vget_lane_f32: |
8696 | 49 | case NEON::BI__builtin_neon_vgetq_lane_i8: |
8697 | 53 | case NEON::BI__builtin_neon_vgetq_lane_i16: |
8698 | 55 | case NEON::BI__builtin_neon_vgetq_lane_i32: |
8699 | 57 | case NEON::BI__builtin_neon_vgetq_lane_i64: |
8700 | 91 | case NEON::BI__builtin_neon_vgetq_lane_bf16: |
8701 | 92 | case NEON::BI__builtin_neon_vgetq_lane_f32: |
8702 | 94 | case NEON::BI__builtin_neon_vduph_lane_bf16: |
8703 | 96 | case NEON::BI__builtin_neon_vduph_laneq_bf16: |
8704 | 96 | return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); |
8705 | | |
8706 | 1 | case NEON::BI__builtin_neon_vrndns_f32: { |
8707 | 1 | Value *Arg = EmitScalarExpr(E->getArg(0)); |
8708 | 1 | llvm::Type *Tys[] = {Arg->getType()}; |
8709 | 1 | Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys); |
8710 | 1 | return Builder.CreateCall(F, {Arg}, "vrndn"); } |
8711 | | |
8712 | 3 | case NEON::BI__builtin_neon_vset_lane_i8: |
8713 | 7 | case NEON::BI__builtin_neon_vset_lane_i16: |
8714 | 9 | case NEON::BI__builtin_neon_vset_lane_i32: |
8715 | 11 | case NEON::BI__builtin_neon_vset_lane_i64: |
8716 | 13 | case NEON::BI__builtin_neon_vset_lane_bf16: |
8717 | 14 | case NEON::BI__builtin_neon_vset_lane_f32: |
8718 | 17 | case NEON::BI__builtin_neon_vsetq_lane_i8: |
8719 | 21 | case NEON::BI__builtin_neon_vsetq_lane_i16: |
8720 | 23 | case NEON::BI__builtin_neon_vsetq_lane_i32: |
8721 | 25 | case NEON::BI__builtin_neon_vsetq_lane_i64: |
8722 | 27 | case NEON::BI__builtin_neon_vsetq_lane_bf16: |
8723 | 28 | case NEON::BI__builtin_neon_vsetq_lane_f32: |
8724 | 28 | return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); |
8725 | | |
8726 | 1 | case NEON::BI__builtin_neon_vsha1h_u32: |
8727 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, |
8728 | 1 | "vsha1h"); |
8729 | 1 | case NEON::BI__builtin_neon_vsha1cq_u32: |
8730 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, |
8731 | 1 | "vsha1h"); |
8732 | 1 | case NEON::BI__builtin_neon_vsha1pq_u32: |
8733 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, |
8734 | 1 | "vsha1h"); |
8735 | 1 | case NEON::BI__builtin_neon_vsha1mq_u32: |
8736 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, |
8737 | 1 | "vsha1h"); |
8738 | | |
8739 | 2 | case NEON::BI__builtin_neon_vcvth_bf16_f32: { |
8740 | 2 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops, |
8741 | 2 | "vcvtbfp2bf"); |
8742 | 27 | } |
8743 | | |
8744 | | // The ARM _MoveToCoprocessor builtins put the input register value as |
8745 | | // the first argument, but the LLVM intrinsic expects it as the third one. |
8746 | 1 | case clang::ARM::BI_MoveToCoprocessor: |
8747 | 2 | case clang::ARM::BI_MoveToCoprocessor2: { |
8748 | 2 | Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor |
8749 | 2 | ? Intrinsic::arm_mcr1 |
8750 | 2 | : Intrinsic::arm_mcr21 ); |
8751 | 2 | return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], |
8752 | 2 | Ops[3], Ops[4], Ops[5]}); |
8753 | 1 | } |
8754 | 1.77k | } |
8755 | | |
8756 | | // Get the last argument, which specifies the vector type. |
8757 | 1.64k | assert(HasExtraArg); |
8758 | 1.64k | const Expr *Arg = E->getArg(E->getNumArgs()-1); |
8759 | 1.64k | std::optional<llvm::APSInt> Result = |
8760 | 1.64k | Arg->getIntegerConstantExpr(getContext()); |
8761 | 1.64k | if (!Result) |
8762 | 0 | return nullptr; |
8763 | | |
8764 | 1.64k | if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f || |
8765 | 1.64k | BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d1.64k ) { |
8766 | | // Determine the overloaded type of this builtin. |
8767 | 4 | llvm::Type *Ty; |
8768 | 4 | if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f) |
8769 | 2 | Ty = FloatTy; |
8770 | 2 | else |
8771 | 2 | Ty = DoubleTy; |
8772 | | |
8773 | | // Determine whether this is an unsigned conversion or not. |
8774 | 4 | bool usgn = Result->getZExtValue() == 1; |
8775 | 4 | unsigned Int = usgn ? Intrinsic::arm_vcvtru2 : Intrinsic::arm_vcvtr2 ; |
8776 | | |
8777 | | // Call the appropriate intrinsic. |
8778 | 4 | Function *F = CGM.getIntrinsic(Int, Ty); |
8779 | 4 | return Builder.CreateCall(F, Ops, "vcvtr"); |
8780 | 4 | } |
8781 | | |
8782 | | // Determine the type of this overloaded NEON intrinsic. |
8783 | 1.64k | NeonTypeFlags Type = Result->getZExtValue(); |
8784 | 1.64k | bool usgn = Type.isUnsigned(); |
8785 | 1.64k | bool rightShift = false; |
8786 | | |
8787 | 1.64k | llvm::FixedVectorType *VTy = |
8788 | 1.64k | GetNeonType(this, Type, getTarget().hasLegalHalfType(), false, |
8789 | 1.64k | getTarget().hasBFloat16Type()); |
8790 | 1.64k | llvm::Type *Ty = VTy; |
8791 | 1.64k | if (!Ty) |
8792 | 0 | return nullptr; |
8793 | | |
8794 | | // Many NEON builtins have identical semantics and uses in ARM and |
8795 | | // AArch64. Emit these in a single function. |
8796 | 1.64k | auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap); |
8797 | 1.64k | const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( |
8798 | 1.64k | IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); |
8799 | 1.64k | if (Builtin) |
8800 | 1.46k | return EmitCommonNeonBuiltinExpr( |
8801 | 1.46k | Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, |
8802 | 1.46k | Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch); |
8803 | | |
8804 | 179 | unsigned Int; |
8805 | 179 | switch (BuiltinID) { |
8806 | 0 | default: return nullptr; |
8807 | 14 | case NEON::BI__builtin_neon_vld1q_lane_v: |
8808 | | // Handle 64-bit integer elements as a special case. Use shuffles of |
8809 | | // one-element vectors to avoid poor code for i64 in the backend. |
8810 | 14 | if (VTy->getElementType()->isIntegerTy(64)) { |
8811 | | // Extract the other lane. |
8812 | 3 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
8813 | 3 | int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); |
8814 | 3 | Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); |
8815 | 3 | Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); |
8816 | | // Load the value as a one-element vector. |
8817 | 3 | Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1); |
8818 | 3 | llvm::Type *Tys[] = {Ty, Int8PtrTy}; |
8819 | 3 | Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); |
8820 | 3 | Value *Align = getAlignmentValue32(PtrOp0); |
8821 | 3 | Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); |
8822 | | // Combine them. |
8823 | 3 | int Indices[] = {1 - Lane, Lane}; |
8824 | 3 | return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane"); |
8825 | 3 | } |
8826 | 14 | [[fallthrough]];11 |
8827 | 24 | case NEON::BI__builtin_neon_vld1_lane_v: { |
8828 | 24 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
8829 | 24 | PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); |
8830 | 24 | Value *Ld = Builder.CreateLoad(PtrOp0); |
8831 | 24 | return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); |
8832 | 11 | } |
8833 | 7 | case NEON::BI__builtin_neon_vqrshrn_n_v: |
8834 | 7 | Int = |
8835 | 7 | usgn ? Intrinsic::arm_neon_vqrshiftnu3 : Intrinsic::arm_neon_vqrshiftns4 ; |
8836 | 7 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", |
8837 | 7 | 1, true); |
8838 | 3 | case NEON::BI__builtin_neon_vqrshrun_n_v: |
8839 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), |
8840 | 3 | Ops, "vqrshrun_n", 1, true); |
8841 | 6 | case NEON::BI__builtin_neon_vqshrn_n_v: |
8842 | 6 | Int = usgn ? Intrinsic::arm_neon_vqshiftnu3 : Intrinsic::arm_neon_vqshiftns3 ; |
8843 | 6 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", |
8844 | 6 | 1, true); |
8845 | 3 | case NEON::BI__builtin_neon_vqshrun_n_v: |
8846 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), |
8847 | 3 | Ops, "vqshrun_n", 1, true); |
8848 | 0 | case NEON::BI__builtin_neon_vrecpe_v: |
8849 | 0 | case NEON::BI__builtin_neon_vrecpeq_v: |
8850 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), |
8851 | 0 | Ops, "vrecpe"); |
8852 | 6 | case NEON::BI__builtin_neon_vrshrn_n_v: |
8853 | 6 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), |
8854 | 6 | Ops, "vrshrn_n", 1, true); |
8855 | 8 | case NEON::BI__builtin_neon_vrsra_n_v: |
8856 | 16 | case NEON::BI__builtin_neon_vrsraq_n_v: |
8857 | 16 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
8858 | 16 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
8859 | 16 | Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); |
8860 | 16 | Int = usgn ? Intrinsic::arm_neon_vrshiftu8 : Intrinsic::arm_neon_vrshifts8 ; |
8861 | 16 | Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); |
8862 | 16 | return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); |
8863 | 10 | case NEON::BI__builtin_neon_vsri_n_v: |
8864 | 20 | case NEON::BI__builtin_neon_vsriq_n_v: |
8865 | 20 | rightShift = true; |
8866 | 20 | [[fallthrough]]; |
8867 | 30 | case NEON::BI__builtin_neon_vsli_n_v: |
8868 | 40 | case NEON::BI__builtin_neon_vsliq_n_v: |
8869 | 40 | Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); |
8870 | 40 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), |
8871 | 40 | Ops, "vsli_n"); |
8872 | 11 | case NEON::BI__builtin_neon_vsra_n_v: |
8873 | 19 | case NEON::BI__builtin_neon_vsraq_n_v: |
8874 | 19 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
8875 | 19 | Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); |
8876 | 19 | return Builder.CreateAdd(Ops[0], Ops[1]); |
8877 | 14 | case NEON::BI__builtin_neon_vst1q_lane_v: |
8878 | | // Handle 64-bit integer elements as a special case. Use a shuffle to get |
8879 | | // a one-element vector and avoid poor code for i64 in the backend. |
8880 | 14 | if (VTy->getElementType()->isIntegerTy(64)) { |
8881 | 3 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
8882 | 3 | Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); |
8883 | 3 | Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); |
8884 | 3 | Ops[2] = getAlignmentValue32(PtrOp0); |
8885 | 3 | llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; |
8886 | 3 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, |
8887 | 3 | Tys), Ops); |
8888 | 3 | } |
8889 | 14 | [[fallthrough]];11 |
8890 | 25 | case NEON::BI__builtin_neon_vst1_lane_v: { |
8891 | 25 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
8892 | 25 | Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); |
8893 | 25 | return Builder.CreateStore(Ops[1], |
8894 | 25 | PtrOp0.withElementType(Ops[1]->getType())); |
8895 | 11 | } |
8896 | 3 | case NEON::BI__builtin_neon_vtbl1_v: |
8897 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), |
8898 | 3 | Ops, "vtbl1"); |
8899 | 3 | case NEON::BI__builtin_neon_vtbl2_v: |
8900 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), |
8901 | 3 | Ops, "vtbl2"); |
8902 | 3 | case NEON::BI__builtin_neon_vtbl3_v: |
8903 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), |
8904 | 3 | Ops, "vtbl3"); |
8905 | 3 | case NEON::BI__builtin_neon_vtbl4_v: |
8906 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), |
8907 | 3 | Ops, "vtbl4"); |
8908 | 3 | case NEON::BI__builtin_neon_vtbx1_v: |
8909 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), |
8910 | 3 | Ops, "vtbx1"); |
8911 | 3 | case NEON::BI__builtin_neon_vtbx2_v: |
8912 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), |
8913 | 3 | Ops, "vtbx2"); |
8914 | 3 | case NEON::BI__builtin_neon_vtbx3_v: |
8915 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), |
8916 | 3 | Ops, "vtbx3"); |
8917 | 3 | case NEON::BI__builtin_neon_vtbx4_v: |
8918 | 3 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), |
8919 | 3 | Ops, "vtbx4"); |
8920 | 179 | } |
8921 | 179 | } |
8922 | | |
8923 | | template<typename Integer> |
8924 | 24 | static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { |
8925 | 24 | return E->getIntegerConstantExpr(Context)->getExtValue(); |
8926 | 24 | } |
8927 | | |
8928 | | static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, |
8929 | 40 | llvm::Type *T, bool Unsigned) { |
8930 | | // Helper function called by Tablegen-constructed ARM MVE builtin codegen, |
8931 | | // which finds it convenient to specify signed/unsigned as a boolean flag. |
8932 | 40 | return Unsigned ? Builder.CreateZExt(V, T)20 : Builder.CreateSExt(V, T)20 ; |
8933 | 40 | } |
8934 | | |
8935 | | static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, |
8936 | 24 | uint32_t Shift, bool Unsigned) { |
8937 | | // MVE helper function for integer shift right. This must handle signed vs |
8938 | | // unsigned, and also deal specially with the case where the shift count is |
8939 | | // equal to the lane size. In LLVM IR, an LShr with that parameter would be |
8940 | | // undefined behavior, but in MVE it's legal, so we must convert it to code |
8941 | | // that is not undefined in IR. |
8942 | 24 | unsigned LaneBits = cast<llvm::VectorType>(V->getType()) |
8943 | 24 | ->getElementType() |
8944 | 24 | ->getPrimitiveSizeInBits(); |
8945 | 24 | if (Shift == LaneBits) { |
8946 | | // An unsigned shift of the full lane size always generates zero, so we can |
8947 | | // simply emit a zero vector. A signed shift of the full lane size does the |
8948 | | // same thing as shifting by one bit fewer. |
8949 | 12 | if (Unsigned) |
8950 | 6 | return llvm::Constant::getNullValue(V->getType()); |
8951 | 6 | else |
8952 | 6 | --Shift; |
8953 | 12 | } |
8954 | 18 | return Unsigned ? Builder.CreateLShr(V, Shift)6 : Builder.CreateAShr(V, Shift)12 ; |
8955 | 24 | } |
8956 | | |
8957 | 530 | static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { |
8958 | | // MVE-specific helper function for a vector splat, which infers the element |
8959 | | // count of the output vector by knowing that MVE vectors are all 128 bits |
8960 | | // wide. |
8961 | 530 | unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits(); |
8962 | 530 | return Builder.CreateVectorSplat(Elements, V); |
8963 | 530 | } |
8964 | | |
8965 | | static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, |
8966 | | CodeGenFunction *CGF, |
8967 | | llvm::Value *V, |
8968 | 459 | llvm::Type *DestType) { |
8969 | | // Convert one MVE vector type into another by reinterpreting its in-register |
8970 | | // format. |
8971 | | // |
8972 | | // Little-endian, this is identical to a bitcast (which reinterprets the |
8973 | | // memory format). But big-endian, they're not necessarily the same, because |
8974 | | // the register and memory formats map to each other differently depending on |
8975 | | // the lane size. |
8976 | | // |
8977 | | // We generate a bitcast whenever we can (if we're little-endian, or if the |
8978 | | // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic |
8979 | | // that performs the different kind of reinterpretation. |
8980 | 459 | if (CGF->getTarget().isBigEndian() && |
8981 | 459 | V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()224 ) { |
8982 | 187 | return Builder.CreateCall( |
8983 | 187 | CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq, |
8984 | 187 | {DestType, V->getType()}), |
8985 | 187 | V); |
8986 | 272 | } else { |
8987 | 272 | return Builder.CreateBitCast(V, DestType); |
8988 | 272 | } |
8989 | 459 | } |
8990 | | |
8991 | 16 | static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { |
8992 | | // Make a shufflevector that extracts every other element of a vector (evens |
8993 | | // or odds, as desired). |
8994 | 16 | SmallVector<int, 16> Indices; |
8995 | 16 | unsigned InputElements = |
8996 | 16 | cast<llvm::FixedVectorType>(V->getType())->getNumElements(); |
8997 | 112 | for (unsigned i = 0; i < InputElements; i += 296 ) |
8998 | 96 | Indices.push_back(i + Odd); |
8999 | 16 | return Builder.CreateShuffleVector(V, Indices); |
9000 | 16 | } |
9001 | | |
9002 | | static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, |
9003 | 32 | llvm::Value *V1) { |
9004 | | // Make a shufflevector that interleaves two vectors element by element. |
9005 | 32 | assert(V0->getType() == V1->getType() && "Can't zip different vector types"); |
9006 | 32 | SmallVector<int, 16> Indices; |
9007 | 32 | unsigned InputElements = |
9008 | 32 | cast<llvm::FixedVectorType>(V0->getType())->getNumElements(); |
9009 | 224 | for (unsigned i = 0; i < InputElements; i++192 ) { |
9010 | 192 | Indices.push_back(i); |
9011 | 192 | Indices.push_back(i + InputElements); |
9012 | 192 | } |
9013 | 32 | return Builder.CreateShuffleVector(V0, V1, Indices); |
9014 | 32 | } |
9015 | | |
9016 | | template<unsigned HighBit, unsigned OtherBits> |
9017 | 36 | static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { |
9018 | | // MVE-specific helper function to make a vector splat of a constant such as |
9019 | | // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. |
9020 | 36 | llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType(); |
9021 | 36 | unsigned LaneBits = T->getPrimitiveSizeInBits(); |
9022 | 36 | uint32_t Value = HighBit << (LaneBits - 1); |
9023 | 36 | if (OtherBits) |
9024 | 24 | Value |= (1UL << (LaneBits - 1)) - 1; |
9025 | 36 | llvm::Value *Lane = llvm::ConstantInt::get(T, Value); |
9026 | 36 | return ARMMVEVectorSplat(Builder, Lane); |
9027 | 36 | } CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<1u, 0u>(clang::CodeGen::CGBuilderTy&, llvm::Type*) Line | Count | Source | 9017 | 12 | static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { | 9018 | | // MVE-specific helper function to make a vector splat of a constant such as | 9019 | | // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. | 9020 | 12 | llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType(); | 9021 | 12 | unsigned LaneBits = T->getPrimitiveSizeInBits(); | 9022 | 12 | uint32_t Value = HighBit << (LaneBits - 1); | 9023 | 12 | if (OtherBits) | 9024 | 0 | Value |= (1UL << (LaneBits - 1)) - 1; | 9025 | 12 | llvm::Value *Lane = llvm::ConstantInt::get(T, Value); | 9026 | 12 | return ARMMVEVectorSplat(Builder, Lane); | 9027 | 12 | } |
CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<0u, 1u>(clang::CodeGen::CGBuilderTy&, llvm::Type*) Line | Count | Source | 9017 | 12 | static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { | 9018 | | // MVE-specific helper function to make a vector splat of a constant such as | 9019 | | // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. | 9020 | 12 | llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType(); | 9021 | 12 | unsigned LaneBits = T->getPrimitiveSizeInBits(); | 9022 | 12 | uint32_t Value = HighBit << (LaneBits - 1); | 9023 | 12 | if (OtherBits) | 9024 | 12 | Value |= (1UL << (LaneBits - 1)) - 1; | 9025 | 12 | llvm::Value *Lane = llvm::ConstantInt::get(T, Value); | 9026 | 12 | return ARMMVEVectorSplat(Builder, Lane); | 9027 | 12 | } |
CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<1u, 1u>(clang::CodeGen::CGBuilderTy&, llvm::Type*) Line | Count | Source | 9017 | 12 | static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { | 9018 | | // MVE-specific helper function to make a vector splat of a constant such as | 9019 | | // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. | 9020 | 12 | llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType(); | 9021 | 12 | unsigned LaneBits = T->getPrimitiveSizeInBits(); | 9022 | 12 | uint32_t Value = HighBit << (LaneBits - 1); | 9023 | 12 | if (OtherBits) | 9024 | 12 | Value |= (1UL << (LaneBits - 1)) - 1; | 9025 | 12 | llvm::Value *Lane = llvm::ConstantInt::get(T, Value); | 9026 | 12 | return ARMMVEVectorSplat(Builder, Lane); | 9027 | 12 | } |
|
9028 | | |
9029 | | static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, |
9030 | | llvm::Value *V, |
9031 | 46 | unsigned ReverseWidth) { |
9032 | | // MVE-specific helper function which reverses the elements of a |
9033 | | // vector within every (ReverseWidth)-bit collection of lanes. |
9034 | 46 | SmallVector<int, 16> Indices; |
9035 | 46 | unsigned LaneSize = V->getType()->getScalarSizeInBits(); |
9036 | 46 | unsigned Elements = 128 / LaneSize; |
9037 | 46 | unsigned Mask = ReverseWidth / LaneSize - 1; |
9038 | 550 | for (unsigned i = 0; i < Elements; i++504 ) |
9039 | 504 | Indices.push_back(i ^ Mask); |
9040 | 46 | return Builder.CreateShuffleVector(V, Indices); |
9041 | 46 | } |
9042 | | |
9043 | | Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, |
9044 | | const CallExpr *E, |
9045 | | ReturnValueSlot ReturnValue, |
9046 | 6.40k | llvm::Triple::ArchType Arch) { |
9047 | 6.40k | enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType; |
9048 | 6.40k | Intrinsic::ID IRIntr; |
9049 | 6.40k | unsigned NumVectors; |
9050 | | |
9051 | | // Code autogenerated by Tablegen will handle all the simple builtins. |
9052 | 6.40k | switch (BuiltinID) { |
9053 | 4 | #include "clang/Basic/arm_mve_builtin_cg.inc" |
9054 | | |
9055 | | // If we didn't match an MVE builtin id at all, go back to the |
9056 | | // main EmitARMBuiltinExpr. |
9057 | 1.81k | default: |
9058 | 1.81k | return nullptr; |
9059 | 6.40k | } |
9060 | | |
9061 | | // Anything that breaks from that switch is an MVE builtin that |
9062 | | // needs handwritten code to generate. |
9063 | | |
9064 | 14 | switch (CustomCodeGenType) { |
9065 | | |
9066 | 8 | case CustomCodeGen::VLD24: { |
9067 | 8 | llvm::SmallVector<Value *, 4> Ops; |
9068 | 8 | llvm::SmallVector<llvm::Type *, 4> Tys; |
9069 | | |
9070 | 8 | auto MvecCType = E->getType(); |
9071 | 8 | auto MvecLType = ConvertType(MvecCType); |
9072 | 8 | assert(MvecLType->isStructTy() && |
9073 | 8 | "Return type for vld[24]q should be a struct"); |
9074 | 8 | assert(MvecLType->getStructNumElements() == 1 && |
9075 | 8 | "Return-type struct for vld[24]q should have one element"); |
9076 | 8 | auto MvecLTypeInner = MvecLType->getStructElementType(0); |
9077 | 8 | assert(MvecLTypeInner->isArrayTy() && |
9078 | 8 | "Return-type struct for vld[24]q should contain an array"); |
9079 | 8 | assert(MvecLTypeInner->getArrayNumElements() == NumVectors && |
9080 | 8 | "Array member of return-type struct vld[24]q has wrong length"); |
9081 | 8 | auto VecLType = MvecLTypeInner->getArrayElementType(); |
9082 | | |
9083 | 8 | Tys.push_back(VecLType); |
9084 | | |
9085 | 8 | auto Addr = E->getArg(0); |
9086 | 8 | Ops.push_back(EmitScalarExpr(Addr)); |
9087 | 8 | Tys.push_back(ConvertType(Addr->getType())); |
9088 | | |
9089 | 8 | Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); |
9090 | 8 | Value *LoadResult = Builder.CreateCall(F, Ops); |
9091 | 8 | Value *MvecOut = PoisonValue::get(MvecLType); |
9092 | 28 | for (unsigned i = 0; i < NumVectors; ++i20 ) { |
9093 | 20 | Value *Vec = Builder.CreateExtractValue(LoadResult, i); |
9094 | 20 | MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); |
9095 | 20 | } |
9096 | | |
9097 | 8 | if (ReturnValue.isNull()) |
9098 | 0 | return MvecOut; |
9099 | 8 | else |
9100 | 8 | return Builder.CreateStore(MvecOut, ReturnValue.getValue()); |
9101 | 8 | } |
9102 | | |
9103 | 6 | case CustomCodeGen::VST24: { |
9104 | 6 | llvm::SmallVector<Value *, 4> Ops; |
9105 | 6 | llvm::SmallVector<llvm::Type *, 4> Tys; |
9106 | | |
9107 | 6 | auto Addr = E->getArg(0); |
9108 | 6 | Ops.push_back(EmitScalarExpr(Addr)); |
9109 | 6 | Tys.push_back(ConvertType(Addr->getType())); |
9110 | | |
9111 | 6 | auto MvecCType = E->getArg(1)->getType(); |
9112 | 6 | auto MvecLType = ConvertType(MvecCType); |
9113 | 6 | assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct"); |
9114 | 6 | assert(MvecLType->getStructNumElements() == 1 && |
9115 | 6 | "Data-type struct for vst2q should have one element"); |
9116 | 6 | auto MvecLTypeInner = MvecLType->getStructElementType(0); |
9117 | 6 | assert(MvecLTypeInner->isArrayTy() && |
9118 | 6 | "Data-type struct for vst2q should contain an array"); |
9119 | 6 | assert(MvecLTypeInner->getArrayNumElements() == NumVectors && |
9120 | 6 | "Array member of return-type struct vld[24]q has wrong length"); |
9121 | 6 | auto VecLType = MvecLTypeInner->getArrayElementType(); |
9122 | | |
9123 | 6 | Tys.push_back(VecLType); |
9124 | | |
9125 | 6 | AggValueSlot MvecSlot = CreateAggTemp(MvecCType); |
9126 | 6 | EmitAggExpr(E->getArg(1), MvecSlot); |
9127 | 6 | auto Mvec = Builder.CreateLoad(MvecSlot.getAddress()); |
9128 | 22 | for (unsigned i = 0; i < NumVectors; i++16 ) |
9129 | 16 | Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); |
9130 | | |
9131 | 6 | Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); |
9132 | 6 | Value *ToReturn = nullptr; |
9133 | 22 | for (unsigned i = 0; i < NumVectors; i++16 ) { |
9134 | 16 | Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); |
9135 | 16 | ToReturn = Builder.CreateCall(F, Ops); |
9136 | 16 | Ops.pop_back(); |
9137 | 16 | } |
9138 | 6 | return ToReturn; |
9139 | 6 | } |
9140 | 14 | } |
9141 | 0 | llvm_unreachable("unknown custom codegen type."); |
9142 | 0 | } |
9143 | | |
9144 | | Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID, |
9145 | | const CallExpr *E, |
9146 | | ReturnValueSlot ReturnValue, |
9147 | 1.81k | llvm::Triple::ArchType Arch) { |
9148 | 1.81k | switch (BuiltinID) { |
9149 | 1.77k | default: |
9150 | 1.77k | return nullptr; |
9151 | 1.81k | #include "clang/Basic/arm_cde_builtin_cg.inc"1 |
9152 | 1.81k | } |
9153 | 1.81k | } |
9154 | | |
9155 | | static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, |
9156 | | const CallExpr *E, |
9157 | | SmallVectorImpl<Value *> &Ops, |
9158 | 1.47k | llvm::Triple::ArchType Arch) { |
9159 | 1.47k | unsigned int Int = 0; |
9160 | 1.47k | const char *s = nullptr; |
9161 | | |
9162 | 1.47k | switch (BuiltinID) { |
9163 | 1.40k | default: |
9164 | 1.40k | return nullptr; |
9165 | 3 | case NEON::BI__builtin_neon_vtbl1_v: |
9166 | 6 | case NEON::BI__builtin_neon_vqtbl1_v: |
9167 | 9 | case NEON::BI__builtin_neon_vqtbl1q_v: |
9168 | 12 | case NEON::BI__builtin_neon_vtbl2_v: |
9169 | 15 | case NEON::BI__builtin_neon_vqtbl2_v: |
9170 | 18 | case NEON::BI__builtin_neon_vqtbl2q_v: |
9171 | 21 | case NEON::BI__builtin_neon_vtbl3_v: |
9172 | 24 | case NEON::BI__builtin_neon_vqtbl3_v: |
9173 | 27 | case NEON::BI__builtin_neon_vqtbl3q_v: |
9174 | 30 | case NEON::BI__builtin_neon_vtbl4_v: |
9175 | 33 | case NEON::BI__builtin_neon_vqtbl4_v: |
9176 | 36 | case NEON::BI__builtin_neon_vqtbl4q_v: |
9177 | 36 | break; |
9178 | 3 | case NEON::BI__builtin_neon_vtbx1_v: |
9179 | 6 | case NEON::BI__builtin_neon_vqtbx1_v: |
9180 | 9 | case NEON::BI__builtin_neon_vqtbx1q_v: |
9181 | 12 | case NEON::BI__builtin_neon_vtbx2_v: |
9182 | 15 | case NEON::BI__builtin_neon_vqtbx2_v: |
9183 | 18 | case NEON::BI__builtin_neon_vqtbx2q_v: |
9184 | 21 | case NEON::BI__builtin_neon_vtbx3_v: |
9185 | 24 | case NEON::BI__builtin_neon_vqtbx3_v: |
9186 | 27 | case NEON::BI__builtin_neon_vqtbx3q_v: |
9187 | 30 | case NEON::BI__builtin_neon_vtbx4_v: |
9188 | 33 | case NEON::BI__builtin_neon_vqtbx4_v: |
9189 | 36 | case NEON::BI__builtin_neon_vqtbx4q_v: |
9190 | 36 | break; |
9191 | 1.47k | } |
9192 | | |
9193 | 72 | assert(E->getNumArgs() >= 3); |
9194 | | |
9195 | | // Get the last argument, which specifies the vector type. |
9196 | 72 | const Expr *Arg = E->getArg(E->getNumArgs() - 1); |
9197 | 72 | std::optional<llvm::APSInt> Result = |
9198 | 72 | Arg->getIntegerConstantExpr(CGF.getContext()); |
9199 | 72 | if (!Result) |
9200 | 0 | return nullptr; |
9201 | | |
9202 | | // Determine the type of this overloaded NEON intrinsic. |
9203 | 72 | NeonTypeFlags Type = Result->getZExtValue(); |
9204 | 72 | llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type); |
9205 | 72 | if (!Ty) |
9206 | 0 | return nullptr; |
9207 | | |
9208 | 72 | CodeGen::CGBuilderTy &Builder = CGF.Builder; |
9209 | | |
9210 | | // AArch64 scalar builtins are not overloaded, they do not have an extra |
9211 | | // argument that specifies the vector type, need to handle each case. |
9212 | 72 | switch (BuiltinID) { |
9213 | 3 | case NEON::BI__builtin_neon_vtbl1_v: { |
9214 | 3 | return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1], |
9215 | 3 | Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); |
9216 | 0 | } |
9217 | 3 | case NEON::BI__builtin_neon_vtbl2_v: { |
9218 | 3 | return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2], |
9219 | 3 | Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); |
9220 | 0 | } |
9221 | 3 | case NEON::BI__builtin_neon_vtbl3_v: { |
9222 | 3 | return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3], |
9223 | 3 | Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); |
9224 | 0 | } |
9225 | 3 | case NEON::BI__builtin_neon_vtbl4_v: { |
9226 | 3 | return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4], |
9227 | 3 | Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); |
9228 | 0 | } |
9229 | 3 | case NEON::BI__builtin_neon_vtbx1_v: { |
9230 | 3 | Value *TblRes = |
9231 | 3 | packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty, |
9232 | 3 | Intrinsic::aarch64_neon_tbl1, "vtbl1"); |
9233 | | |
9234 | 3 | llvm::Constant *EightV = ConstantInt::get(Ty, 8); |
9235 | 3 | Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); |
9236 | 3 | CmpRes = Builder.CreateSExt(CmpRes, Ty); |
9237 | | |
9238 | 3 | Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); |
9239 | 3 | Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); |
9240 | 3 | return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); |
9241 | 0 | } |
9242 | 3 | case NEON::BI__builtin_neon_vtbx2_v: { |
9243 | 3 | return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3], |
9244 | 3 | Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1"); |
9245 | 0 | } |
9246 | 3 | case NEON::BI__builtin_neon_vtbx3_v: { |
9247 | 3 | Value *TblRes = |
9248 | 3 | packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty, |
9249 | 3 | Intrinsic::aarch64_neon_tbl2, "vtbl2"); |
9250 | | |
9251 | 3 | llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); |
9252 | 3 | Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], |
9253 | 3 | TwentyFourV); |
9254 | 3 | CmpRes = Builder.CreateSExt(CmpRes, Ty); |
9255 | | |
9256 | 3 | Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); |
9257 | 3 | Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); |
9258 | 3 | return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); |
9259 | 0 | } |
9260 | 3 | case NEON::BI__builtin_neon_vtbx4_v: { |
9261 | 3 | return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5], |
9262 | 3 | Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2"); |
9263 | 0 | } |
9264 | 3 | case NEON::BI__builtin_neon_vqtbl1_v: |
9265 | 6 | case NEON::BI__builtin_neon_vqtbl1q_v: |
9266 | 6 | Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; |
9267 | 3 | case NEON::BI__builtin_neon_vqtbl2_v: |
9268 | 6 | case NEON::BI__builtin_neon_vqtbl2q_v: { |
9269 | 6 | Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; |
9270 | 3 | case NEON::BI__builtin_neon_vqtbl3_v: |
9271 | 6 | case NEON::BI__builtin_neon_vqtbl3q_v: |
9272 | 6 | Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; |
9273 | 3 | case NEON::BI__builtin_neon_vqtbl4_v: |
9274 | 6 | case NEON::BI__builtin_neon_vqtbl4q_v: |
9275 | 6 | Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; |
9276 | 3 | case NEON::BI__builtin_neon_vqtbx1_v: |
9277 | 6 | case NEON::BI__builtin_neon_vqtbx1q_v: |
9278 | 6 | Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; |
9279 | 3 | case NEON::BI__builtin_neon_vqtbx2_v: |
9280 | 6 | case NEON::BI__builtin_neon_vqtbx2q_v: |
9281 | 6 | Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; |
9282 | 3 | case NEON::BI__builtin_neon_vqtbx3_v: |
9283 | 6 | case NEON::BI__builtin_neon_vqtbx3q_v: |
9284 | 6 | Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; |
9285 | 3 | case NEON::BI__builtin_neon_vqtbx4_v: |
9286 | 6 | case NEON::BI__builtin_neon_vqtbx4q_v: |
9287 | 6 | Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; |
9288 | 3 | } |
9289 | 72 | } |
9290 | | |
9291 | 48 | if (!Int) |
9292 | 0 | return nullptr; |
9293 | | |
9294 | 48 | Function *F = CGF.CGM.getIntrinsic(Int, Ty); |
9295 | 48 | return CGF.EmitNeonCall(F, Ops, s); |
9296 | 48 | } |
9297 | | |
9298 | 12 | Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { |
9299 | 12 | auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
9300 | 12 | Op = Builder.CreateBitCast(Op, Int16Ty); |
9301 | 12 | Value *V = PoisonValue::get(VTy); |
9302 | 12 | llvm::Constant *CI = ConstantInt::get(SizeTy, 0); |
9303 | 12 | Op = Builder.CreateInsertElement(V, Op, CI); |
9304 | 12 | return Op; |
9305 | 12 | } |
9306 | | |
9307 | | /// SVEBuiltinMemEltTy - Returns the memory element type for this memory |
9308 | | /// access builtin. Only required if it can't be inferred from the base pointer |
9309 | | /// operand. |
9310 | 3.76k | llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) { |
9311 | 3.76k | switch (TypeFlags.getMemEltType()) { |
9312 | 1.27k | case SVETypeFlags::MemEltTyDefault: |
9313 | 1.27k | return getEltType(TypeFlags); |
9314 | 679 | case SVETypeFlags::MemEltTyInt8: |
9315 | 679 | return Builder.getInt8Ty(); |
9316 | 1.08k | case SVETypeFlags::MemEltTyInt16: |
9317 | 1.08k | return Builder.getInt16Ty(); |
9318 | 649 | case SVETypeFlags::MemEltTyInt32: |
9319 | 649 | return Builder.getInt32Ty(); |
9320 | 75 | case SVETypeFlags::MemEltTyInt64: |
9321 | 75 | return Builder.getInt64Ty(); |
9322 | 3.76k | } |
9323 | 0 | llvm_unreachable("Unknown MemEltType"); |
9324 | 0 | } |
9325 | | |
9326 | 1.27k | llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { |
9327 | 1.27k | switch (TypeFlags.getEltType()) { |
9328 | 0 | default: |
9329 | 0 | llvm_unreachable("Invalid SVETypeFlag!"); |
9330 | |
|
9331 | 49 | case SVETypeFlags::EltTyInt8: |
9332 | 49 | return Builder.getInt8Ty(); |
9333 | 49 | case SVETypeFlags::EltTyInt16: |
9334 | 49 | return Builder.getInt16Ty(); |
9335 | 355 | case SVETypeFlags::EltTyInt32: |
9336 | 355 | return Builder.getInt32Ty(); |
9337 | 427 | case SVETypeFlags::EltTyInt64: |
9338 | 427 | return Builder.getInt64Ty(); |
9339 | 48 | case SVETypeFlags::EltTyInt128: |
9340 | 48 | return Builder.getInt128Ty(); |
9341 | | |
9342 | 0 | case SVETypeFlags::EltTyFloat16: |
9343 | 0 | return Builder.getHalfTy(); |
9344 | 153 | case SVETypeFlags::EltTyFloat32: |
9345 | 153 | return Builder.getFloatTy(); |
9346 | 189 | case SVETypeFlags::EltTyFloat64: |
9347 | 189 | return Builder.getDoubleTy(); |
9348 | | |
9349 | 0 | case SVETypeFlags::EltTyBFloat16: |
9350 | 0 | return Builder.getBFloatTy(); |
9351 | | |
9352 | 0 | case SVETypeFlags::EltTyBool8: |
9353 | 0 | case SVETypeFlags::EltTyBool16: |
9354 | 0 | case SVETypeFlags::EltTyBool32: |
9355 | 0 | case SVETypeFlags::EltTyBool64: |
9356 | 0 | return Builder.getInt1Ty(); |
9357 | 1.27k | } |
9358 | 1.27k | } |
9359 | | |
9360 | | // Return the llvm predicate vector type corresponding to the specified element |
9361 | | // TypeFlags. |
9362 | | llvm::ScalableVectorType * |
9363 | 168 | CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) { |
9364 | 168 | switch (TypeFlags.getEltType()) { |
9365 | 0 | default: llvm_unreachable("Unhandled SVETypeFlag!"); |
9366 | |
|
9367 | 24 | case SVETypeFlags::EltTyInt8: |
9368 | 24 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); |
9369 | 24 | case SVETypeFlags::EltTyInt16: |
9370 | 24 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); |
9371 | 24 | case SVETypeFlags::EltTyInt32: |
9372 | 24 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); |
9373 | 24 | case SVETypeFlags::EltTyInt64: |
9374 | 24 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); |
9375 | | |
9376 | 12 | case SVETypeFlags::EltTyBFloat16: |
9377 | 12 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); |
9378 | 12 | case SVETypeFlags::EltTyFloat16: |
9379 | 12 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); |
9380 | 12 | case SVETypeFlags::EltTyFloat32: |
9381 | 12 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); |
9382 | 12 | case SVETypeFlags::EltTyFloat64: |
9383 | 12 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); |
9384 | | |
9385 | 6 | case SVETypeFlags::EltTyBool8: |
9386 | 6 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); |
9387 | 6 | case SVETypeFlags::EltTyBool16: |
9388 | 6 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); |
9389 | 6 | case SVETypeFlags::EltTyBool32: |
9390 | 6 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); |
9391 | 6 | case SVETypeFlags::EltTyBool64: |
9392 | 6 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); |
9393 | 168 | } |
9394 | 168 | } |
9395 | | |
9396 | | // Return the llvm vector type corresponding to the specified element TypeFlags. |
9397 | | llvm::ScalableVectorType * |
9398 | 47.8k | CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { |
9399 | 47.8k | switch (TypeFlags.getEltType()) { |
9400 | 0 | default: |
9401 | 0 | llvm_unreachable("Invalid SVETypeFlag!"); |
9402 | |
|
9403 | 7.02k | case SVETypeFlags::EltTyInt8: |
9404 | 7.02k | return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); |
9405 | 8.46k | case SVETypeFlags::EltTyInt16: |
9406 | 8.46k | return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8); |
9407 | 10.5k | case SVETypeFlags::EltTyInt32: |
9408 | 10.5k | return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4); |
9409 | 10.9k | case SVETypeFlags::EltTyInt64: |
9410 | 10.9k | return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); |
9411 | | |
9412 | 2.72k | case SVETypeFlags::EltTyFloat16: |
9413 | 2.72k | return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); |
9414 | 460 | case SVETypeFlags::EltTyBFloat16: |
9415 | 460 | return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8); |
9416 | 3.03k | case SVETypeFlags::EltTyFloat32: |
9417 | 3.03k | return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); |
9418 | 3.10k | case SVETypeFlags::EltTyFloat64: |
9419 | 3.10k | return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2); |
9420 | | |
9421 | 939 | case SVETypeFlags::EltTyBool8: |
9422 | 939 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); |
9423 | 207 | case SVETypeFlags::EltTyBool16: |
9424 | 207 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); |
9425 | 207 | case SVETypeFlags::EltTyBool32: |
9426 | 207 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); |
9427 | 207 | case SVETypeFlags::EltTyBool64: |
9428 | 207 | return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); |
9429 | 47.8k | } |
9430 | 47.8k | } |
9431 | | |
9432 | | llvm::Value * |
9433 | 24 | CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) { |
9434 | 24 | Function *Ptrue = |
9435 | 24 | CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); |
9436 | 24 | return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); |
9437 | 24 | } |
9438 | | |
9439 | | constexpr unsigned SVEBitsPerBlock = 128; |
9440 | | |
9441 | 7.95k | static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { |
9442 | 7.95k | unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits(); |
9443 | 7.95k | return llvm::ScalableVectorType::get(EltTy, NumElts); |
9444 | 7.95k | } |
9445 | | |
9446 | | // Reinterpret the input predicate so that it can be used to correctly isolate |
9447 | | // the elements of the specified datatype. |
9448 | | Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, |
9449 | 26.5k | llvm::ScalableVectorType *VTy) { |
9450 | | |
9451 | 26.5k | if (isa<TargetExtType>(Pred->getType()) && |
9452 | 26.5k | cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount"704 ) |
9453 | 704 | return Pred; |
9454 | | |
9455 | 25.8k | auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy); |
9456 | 25.8k | if (Pred->getType() == RTy) |
9457 | 4.62k | return Pred; |
9458 | | |
9459 | 21.2k | unsigned IntID; |
9460 | 21.2k | llvm::Type *IntrinsicTy; |
9461 | 21.2k | switch (VTy->getMinNumElements()) { |
9462 | 0 | default: |
9463 | 0 | llvm_unreachable("unsupported element count!"); |
9464 | 48 | case 1: |
9465 | 7.75k | case 2: |
9466 | 14.5k | case 4: |
9467 | 19.8k | case 8: |
9468 | 19.8k | IntID = Intrinsic::aarch64_sve_convert_from_svbool; |
9469 | 19.8k | IntrinsicTy = RTy; |
9470 | 19.8k | break; |
9471 | 1.36k | case 16: |
9472 | 1.36k | IntID = Intrinsic::aarch64_sve_convert_to_svbool; |
9473 | 1.36k | IntrinsicTy = Pred->getType(); |
9474 | 1.36k | break; |
9475 | 21.2k | } |
9476 | | |
9477 | 21.2k | Function *F = CGM.getIntrinsic(IntID, IntrinsicTy); |
9478 | 21.2k | Value *C = Builder.CreateCall(F, Pred); |
9479 | 21.2k | assert(C->getType() == RTy && "Unexpected return type!"); |
9480 | 21.2k | return C; |
9481 | 21.2k | } |
9482 | | |
9483 | | Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, |
9484 | | SmallVectorImpl<Value *> &Ops, |
9485 | 2.37k | unsigned IntID) { |
9486 | 2.37k | auto *ResultTy = getSVEType(TypeFlags); |
9487 | 2.37k | auto *OverloadedTy = |
9488 | 2.37k | llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); |
9489 | | |
9490 | | // At the ACLE level there's only one predicate type, svbool_t, which is |
9491 | | // mapped to <n x 16 x i1>. However, this might be incompatible with the |
9492 | | // actual type being loaded. For example, when loading doubles (i64) the |
9493 | | // predicated should be <n x 2 x i1> instead. At the IR level the type of |
9494 | | // the predicate and the data being loaded must match. Cast accordingly. |
9495 | 2.37k | Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); |
9496 | | |
9497 | 2.37k | Function *F = nullptr; |
9498 | 2.37k | if (Ops[1]->getType()->isVectorTy()) |
9499 | | // This is the "vector base, scalar offset" case. In order to uniquely |
9500 | | // map this built-in to an LLVM IR intrinsic, we need both the return type |
9501 | | // and the type of the vector base. |
9502 | 1.12k | F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()}); |
9503 | 1.25k | else |
9504 | | // This is the "scalar base, vector offset case". The type of the offset |
9505 | | // is encoded in the name of the intrinsic. We only need to specify the |
9506 | | // return type in order to uniquely map this built-in to an LLVM IR |
9507 | | // intrinsic. |
9508 | 1.25k | F = CGM.getIntrinsic(IntID, OverloadedTy); |
9509 | | |
9510 | | // Pass 0 when the offset is missing. This can only be applied when using |
9511 | | // the "vector base" addressing mode for which ACLE allows no offset. The |
9512 | | // corresponding LLVM IR always requires an offset. |
9513 | 2.37k | if (Ops.size() == 2) { |
9514 | 416 | assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); |
9515 | 416 | Ops.push_back(ConstantInt::get(Int64Ty, 0)); |
9516 | 416 | } |
9517 | | |
9518 | | // For "vector base, scalar index" scale the index so that it becomes a |
9519 | | // scalar offset. |
9520 | 2.37k | if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()1.19k ) { |
9521 | 704 | unsigned BytesPerElt = |
9522 | 704 | OverloadedTy->getElementType()->getScalarSizeInBits() / 8; |
9523 | 704 | Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); |
9524 | 704 | } |
9525 | | |
9526 | 2.37k | Value *Call = Builder.CreateCall(F, Ops); |
9527 | | |
9528 | | // The following sext/zext is only needed when ResultTy != OverloadedTy. In |
9529 | | // other cases it's folded into a nop. |
9530 | 2.37k | return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)880 |
9531 | 2.37k | : Builder.CreateSExt(Call, ResultTy)1.49k ; |
9532 | 2.37k | } |
9533 | | |
9534 | | Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, |
9535 | | SmallVectorImpl<Value *> &Ops, |
9536 | 838 | unsigned IntID) { |
9537 | 838 | auto *SrcDataTy = getSVEType(TypeFlags); |
9538 | 838 | auto *OverloadedTy = |
9539 | 838 | llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy); |
9540 | | |
9541 | | // In ACLE the source data is passed in the last argument, whereas in LLVM IR |
9542 | | // it's the first argument. Move it accordingly. |
9543 | 838 | Ops.insert(Ops.begin(), Ops.pop_back_val()); |
9544 | | |
9545 | 838 | Function *F = nullptr; |
9546 | 838 | if (Ops[2]->getType()->isVectorTy()) |
9547 | | // This is the "vector base, scalar offset" case. In order to uniquely |
9548 | | // map this built-in to an LLVM IR intrinsic, we need both the return type |
9549 | | // and the type of the vector base. |
9550 | 412 | F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()}); |
9551 | 426 | else |
9552 | | // This is the "scalar base, vector offset case". The type of the offset |
9553 | | // is encoded in the name of the intrinsic. We only need to specify the |
9554 | | // return type in order to uniquely map this built-in to an LLVM IR |
9555 | | // intrinsic. |
9556 | 426 | F = CGM.getIntrinsic(IntID, OverloadedTy); |
9557 | | |
9558 | | // Pass 0 when the offset is missing. This can only be applied when using |
9559 | | // the "vector base" addressing mode for which ACLE allows no offset. The |
9560 | | // corresponding LLVM IR always requires an offset. |
9561 | 838 | if (Ops.size() == 3) { |
9562 | 148 | assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); |
9563 | 148 | Ops.push_back(ConstantInt::get(Int64Ty, 0)); |
9564 | 148 | } |
9565 | | |
9566 | | // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's |
9567 | | // folded into a nop. |
9568 | 838 | Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy); |
9569 | | |
9570 | | // At the ACLE level there's only one predicate type, svbool_t, which is |
9571 | | // mapped to <n x 16 x i1>. However, this might be incompatible with the |
9572 | | // actual type being stored. For example, when storing doubles (i64) the |
9573 | | // predicated should be <n x 2 x i1> instead. At the IR level the type of |
9574 | | // the predicate and the data being stored must match. Cast accordingly. |
9575 | 838 | Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy); |
9576 | | |
9577 | | // For "vector base, scalar index" scale the index so that it becomes a |
9578 | | // scalar offset. |
9579 | 838 | if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()436 ) { |
9580 | 264 | unsigned BytesPerElt = |
9581 | 264 | OverloadedTy->getElementType()->getScalarSizeInBits() / 8; |
9582 | 264 | Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt)); |
9583 | 264 | } |
9584 | | |
9585 | 838 | return Builder.CreateCall(F, Ops); |
9586 | 838 | } |
9587 | | |
9588 | | Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, |
9589 | | SmallVectorImpl<Value *> &Ops, |
9590 | 160 | unsigned IntID) { |
9591 | | // The gather prefetches are overloaded on the vector input - this can either |
9592 | | // be the vector of base addresses or vector of offsets. |
9593 | 160 | auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType()); |
9594 | 160 | if (!OverloadedTy) |
9595 | 80 | OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType()); |
9596 | | |
9597 | | // Cast the predicate from svbool_t to the right number of elements. |
9598 | 160 | Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); |
9599 | | |
9600 | | // vector + imm addressing modes |
9601 | 160 | if (Ops[1]->getType()->isVectorTy()) { |
9602 | 80 | if (Ops.size() == 3) { |
9603 | | // Pass 0 for 'vector+imm' when the index is omitted. |
9604 | 40 | Ops.push_back(ConstantInt::get(Int64Ty, 0)); |
9605 | | |
9606 | | // The sv_prfop is the last operand in the builtin and IR intrinsic. |
9607 | 40 | std::swap(Ops[2], Ops[3]); |
9608 | 40 | } else { |
9609 | | // Index needs to be passed as scaled offset. |
9610 | 40 | llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); |
9611 | 40 | unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8; |
9612 | 40 | if (BytesPerElt > 1) |
9613 | 30 | Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); |
9614 | 40 | } |
9615 | 80 | } |
9616 | | |
9617 | 160 | Function *F = CGM.getIntrinsic(IntID, OverloadedTy); |
9618 | 160 | return Builder.CreateCall(F, Ops); |
9619 | 160 | } |
9620 | | |
9621 | | Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, |
9622 | | SmallVectorImpl<Value*> &Ops, |
9623 | 640 | unsigned IntID) { |
9624 | 640 | llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); |
9625 | | |
9626 | 640 | unsigned N; |
9627 | 640 | switch (IntID) { |
9628 | 96 | case Intrinsic::aarch64_sve_ld2_sret: |
9629 | 184 | case Intrinsic::aarch64_sve_ld1_pn_x2: |
9630 | 272 | case Intrinsic::aarch64_sve_ldnt1_pn_x2: |
9631 | 272 | N = 2; |
9632 | 272 | break; |
9633 | 96 | case Intrinsic::aarch64_sve_ld3_sret: |
9634 | 96 | N = 3; |
9635 | 96 | break; |
9636 | 96 | case Intrinsic::aarch64_sve_ld4_sret: |
9637 | 184 | case Intrinsic::aarch64_sve_ld1_pn_x4: |
9638 | 272 | case Intrinsic::aarch64_sve_ldnt1_pn_x4: |
9639 | 272 | N = 4; |
9640 | 272 | break; |
9641 | 0 | default: |
9642 | 0 | llvm_unreachable("unknown intrinsic!"); |
9643 | 640 | } |
9644 | 640 | auto RetTy = llvm::VectorType::get(VTy->getElementType(), |
9645 | 640 | VTy->getElementCount() * N); |
9646 | | |
9647 | 640 | Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); |
9648 | 640 | Value *BasePtr = Ops[1]; |
9649 | | |
9650 | | // Does the load have an offset? |
9651 | 640 | if (Ops.size() > 2) |
9652 | 320 | BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); |
9653 | | |
9654 | 640 | Function *F = CGM.getIntrinsic(IntID, {VTy}); |
9655 | 640 | Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); |
9656 | 640 | unsigned MinElts = VTy->getMinNumElements(); |
9657 | 640 | Value *Ret = llvm::PoisonValue::get(RetTy); |
9658 | 2.56k | for (unsigned I = 0; I < N; I++1.92k ) { |
9659 | 1.92k | Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); |
9660 | 1.92k | Value *SRet = Builder.CreateExtractValue(Call, I); |
9661 | 1.92k | Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); |
9662 | 1.92k | } |
9663 | 640 | return Ret; |
9664 | 640 | } |
9665 | | |
9666 | | Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, |
9667 | | SmallVectorImpl<Value*> &Ops, |
9668 | 706 | unsigned IntID) { |
9669 | 706 | llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); |
9670 | | |
9671 | 706 | unsigned N; |
9672 | 706 | switch (IntID) { |
9673 | 118 | case Intrinsic::aarch64_sve_st2: |
9674 | 206 | case Intrinsic::aarch64_sve_st1_pn_x2: |
9675 | 294 | case Intrinsic::aarch64_sve_stnt1_pn_x2: |
9676 | 294 | N = 2; |
9677 | 294 | break; |
9678 | 118 | case Intrinsic::aarch64_sve_st3: |
9679 | 118 | N = 3; |
9680 | 118 | break; |
9681 | 118 | case Intrinsic::aarch64_sve_st4: |
9682 | 206 | case Intrinsic::aarch64_sve_st1_pn_x4: |
9683 | 294 | case Intrinsic::aarch64_sve_stnt1_pn_x4: |
9684 | 294 | N = 4; |
9685 | 294 | break; |
9686 | 0 | default: |
9687 | 0 | llvm_unreachable("unknown intrinsic!"); |
9688 | 706 | } |
9689 | | |
9690 | 706 | Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); |
9691 | 706 | Value *BasePtr = Ops[1]; |
9692 | | |
9693 | | // Does the store have an offset? |
9694 | 706 | if (Ops.size() > (2 + N)) |
9695 | 353 | BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); |
9696 | | |
9697 | | // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we |
9698 | | // need to break up the tuple vector. |
9699 | 706 | SmallVector<llvm::Value*, 5> Operands; |
9700 | 2.82k | for (unsigned I = Ops.size() - N; I < Ops.size(); ++I2.11k ) |
9701 | 2.11k | Operands.push_back(Ops[I]); |
9702 | 706 | Operands.append({Predicate, BasePtr}); |
9703 | 706 | Function *F = CGM.getIntrinsic(IntID, { VTy }); |
9704 | | |
9705 | 706 | return Builder.CreateCall(F, Operands); |
9706 | 706 | } |
9707 | | |
9708 | | // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and |
9709 | | // svpmullt_pair intrinsics, with the exception that their results are bitcast |
9710 | | // to a wider type. |
9711 | | Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags, |
9712 | | SmallVectorImpl<Value *> &Ops, |
9713 | 48 | unsigned BuiltinID) { |
9714 | | // Splat scalar operand to vector (intrinsics with _n infix) |
9715 | 48 | if (TypeFlags.hasSplatOperand()) { |
9716 | 24 | unsigned OpNo = TypeFlags.getSplatOperand(); |
9717 | 24 | Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); |
9718 | 24 | } |
9719 | | |
9720 | | // The pair-wise function has a narrower overloaded type. |
9721 | 48 | Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType()); |
9722 | 48 | Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]}); |
9723 | | |
9724 | | // Now bitcast to the wider result type. |
9725 | 48 | llvm::ScalableVectorType *Ty = getSVEType(TypeFlags); |
9726 | 48 | return EmitSVEReinterpret(Call, Ty); |
9727 | 48 | } |
9728 | | |
9729 | | Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags, |
9730 | 72 | ArrayRef<Value *> Ops, unsigned BuiltinID) { |
9731 | 72 | llvm::Type *OverloadedTy = getSVEType(TypeFlags); |
9732 | 72 | Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy); |
9733 | 72 | return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)}); |
9734 | 72 | } |
9735 | | |
9736 | | Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, |
9737 | | SmallVectorImpl<Value *> &Ops, |
9738 | 260 | unsigned BuiltinID) { |
9739 | 260 | auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); |
9740 | 260 | auto *VectorTy = getSVEVectorForElementType(MemEltTy); |
9741 | 260 | auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); |
9742 | | |
9743 | 260 | Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); |
9744 | 260 | Value *BasePtr = Ops[1]; |
9745 | | |
9746 | | // Implement the index operand if not omitted. |
9747 | 260 | if (Ops.size() > 3) |
9748 | 20 | BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); |
9749 | | |
9750 | 260 | Value *PrfOp = Ops.back(); |
9751 | | |
9752 | 260 | Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType()); |
9753 | 260 | return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp}); |
9754 | 260 | } |
9755 | | |
9756 | | Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, |
9757 | | llvm::Type *ReturnTy, |
9758 | | SmallVectorImpl<Value *> &Ops, |
9759 | | unsigned BuiltinID, |
9760 | 1.10k | bool IsZExtReturn) { |
9761 | 1.10k | QualType LangPTy = E->getArg(1)->getType(); |
9762 | 1.10k | llvm::Type *MemEltTy = CGM.getTypes().ConvertType( |
9763 | 1.10k | LangPTy->castAs<PointerType>()->getPointeeType()); |
9764 | | |
9765 | | // The vector type that is returned may be different from the |
9766 | | // eventual type loaded from memory. |
9767 | 1.10k | auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy); |
9768 | 1.10k | auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); |
9769 | | |
9770 | 1.10k | Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); |
9771 | 1.10k | Value *BasePtr = Ops[1]; |
9772 | | |
9773 | | // Does the load have an offset? |
9774 | 1.10k | if (Ops.size() > 2) |
9775 | 552 | BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); |
9776 | | |
9777 | 1.10k | Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); |
9778 | 1.10k | auto *Load = |
9779 | 1.10k | cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr})); |
9780 | 1.10k | auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); |
9781 | 1.10k | CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); |
9782 | | |
9783 | 1.10k | return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)312 |
9784 | 1.10k | : Builder.CreateSExt(Load, VectorTy)792 ; |
9785 | 1.10k | } |
9786 | | |
9787 | | Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, |
9788 | | SmallVectorImpl<Value *> &Ops, |
9789 | 292 | unsigned BuiltinID) { |
9790 | 292 | QualType LangPTy = E->getArg(1)->getType(); |
9791 | 292 | llvm::Type *MemEltTy = CGM.getTypes().ConvertType( |
9792 | 292 | LangPTy->castAs<PointerType>()->getPointeeType()); |
9793 | | |
9794 | | // The vector type that is stored may be different from the |
9795 | | // eventual type stored to memory. |
9796 | 292 | auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType()); |
9797 | 292 | auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); |
9798 | | |
9799 | 292 | Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); |
9800 | 292 | Value *BasePtr = Ops[1]; |
9801 | | |
9802 | | // Does the store have an offset? |
9803 | 292 | if (Ops.size() == 4) |
9804 | 146 | BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); |
9805 | | |
9806 | | // Last value is always the data |
9807 | 292 | llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy); |
9808 | | |
9809 | 292 | Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); |
9810 | 292 | auto *Store = |
9811 | 292 | cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr})); |
9812 | 292 | auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); |
9813 | 292 | CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); |
9814 | 292 | return Store; |
9815 | 292 | } |
9816 | | |
9817 | | Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, |
9818 | | SmallVectorImpl<Value *> &Ops, |
9819 | 244 | unsigned IntID) { |
9820 | 244 | Ops[2] = EmitSVEPredicateCast( |
9821 | 244 | Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); |
9822 | | |
9823 | 244 | SmallVector<Value *> NewOps; |
9824 | 244 | NewOps.push_back(Ops[2]); |
9825 | | |
9826 | 244 | llvm::Value *BasePtr = Ops[3]; |
9827 | | |
9828 | | // If the intrinsic contains the vnum parameter, multiply it with the vector |
9829 | | // size in bytes. |
9830 | 244 | if (Ops.size() == 5) { |
9831 | 122 | Function *StreamingVectorLength = |
9832 | 122 | CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); |
9833 | 122 | llvm::Value *StreamingVectorLengthCall = |
9834 | 122 | Builder.CreateCall(StreamingVectorLength); |
9835 | 122 | llvm::Value *Mulvl = |
9836 | 122 | Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl"); |
9837 | | // The type of the ptr parameter is void *, so use Int8Ty here. |
9838 | 122 | BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl); |
9839 | 122 | } |
9840 | 244 | NewOps.push_back(BasePtr); |
9841 | 244 | NewOps.push_back(Ops[0]); |
9842 | 244 | NewOps.push_back(Ops[1]); |
9843 | 244 | Function *F = CGM.getIntrinsic(IntID); |
9844 | 244 | return Builder.CreateCall(F, NewOps); |
9845 | 244 | } |
9846 | | |
9847 | | Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags, |
9848 | | SmallVectorImpl<Value *> &Ops, |
9849 | 960 | unsigned IntID) { |
9850 | 960 | auto *VecTy = getSVEType(TypeFlags); |
9851 | 960 | Function *F = CGM.getIntrinsic(IntID, VecTy); |
9852 | 960 | if (TypeFlags.isReadZA()) |
9853 | 480 | Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); |
9854 | 480 | else if (TypeFlags.isWriteZA()) |
9855 | 480 | Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy); |
9856 | 960 | return Builder.CreateCall(F, Ops); |
9857 | 960 | } |
9858 | | |
9859 | | Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, |
9860 | | SmallVectorImpl<Value *> &Ops, |
9861 | 12 | unsigned IntID) { |
9862 | | // svzero_za() intrinsic zeros the entire za tile and has no paramters. |
9863 | 12 | if (Ops.size() == 0) |
9864 | 3 | Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255)); |
9865 | 12 | Function *F = CGM.getIntrinsic(IntID, {}); |
9866 | 12 | return Builder.CreateCall(F, Ops); |
9867 | 12 | } |
9868 | | |
9869 | | Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, |
9870 | | SmallVectorImpl<Value *> &Ops, |
9871 | 24 | unsigned IntID) { |
9872 | 24 | if (Ops.size() == 3) { |
9873 | 18 | Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); |
9874 | 18 | llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); |
9875 | | |
9876 | 18 | llvm::Value *VecNum = Ops[2]; |
9877 | 18 | llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); |
9878 | | |
9879 | 18 | Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); |
9880 | 18 | Ops[0] = Builder.CreateAdd( |
9881 | 18 | Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); |
9882 | 18 | Ops.erase(&Ops[2]); |
9883 | 18 | } |
9884 | 24 | Function *F = CGM.getIntrinsic(IntID, {}); |
9885 | 24 | return Builder.CreateCall(F, Ops); |
9886 | 24 | } |
9887 | | |
9888 | | // Limit the usage of scalable llvm IR generated by the ACLE by using the |
9889 | | // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. |
9890 | 7.38k | Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { |
9891 | 7.38k | return Builder.CreateVectorSplat( |
9892 | 7.38k | cast<llvm::VectorType>(Ty)->getElementCount(), Scalar); |
9893 | 7.38k | } |
9894 | | |
9895 | 7.36k | Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) { |
9896 | 7.36k | return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType())); |
9897 | 7.36k | } |
9898 | | |
9899 | 2.50k | Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) { |
9900 | | // FIXME: For big endian this needs an additional REV, or needs a separate |
9901 | | // intrinsic that is code-generated as a no-op, because the LLVM bitcast |
9902 | | // instruction is defined as 'bitwise' equivalent from memory point of |
9903 | | // view (when storing/reloading), whereas the svreinterpret builtin |
9904 | | // implements bitwise equivalent cast from register point of view. |
9905 | | // LLVM CodeGen for a bitcast must add an explicit REV for big-endian. |
9906 | 2.50k | return Builder.CreateBitCast(Val, Ty); |
9907 | 2.50k | } |
9908 | | |
9909 | | static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, |
9910 | 907 | SmallVectorImpl<Value *> &Ops) { |
9911 | 907 | auto *SplatZero = Constant::getNullValue(Ty); |
9912 | 907 | Ops.insert(Ops.begin(), SplatZero); |
9913 | 907 | } |
9914 | | |
9915 | | static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, |
9916 | 917 | SmallVectorImpl<Value *> &Ops) { |
9917 | 917 | auto *SplatUndef = UndefValue::get(Ty); |
9918 | 917 | Ops.insert(Ops.begin(), SplatUndef); |
9919 | 917 | } |
9920 | | |
9921 | | SmallVector<llvm::Type *, 2> |
9922 | | CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, |
9923 | | llvm::Type *ResultType, |
9924 | 25.9k | ArrayRef<Value *> Ops) { |
9925 | 25.9k | if (TypeFlags.isOverloadNone()) |
9926 | 1.40k | return {}; |
9927 | | |
9928 | 24.5k | llvm::Type *DefaultType = getSVEType(TypeFlags); |
9929 | | |
9930 | 24.5k | if (TypeFlags.isOverloadWhile()) |
9931 | 352 | return {DefaultType, Ops[1]->getType()}; |
9932 | | |
9933 | 24.2k | if (TypeFlags.isOverloadWhileRW()) |
9934 | 144 | return {getSVEPredType(TypeFlags), Ops[0]->getType()}; |
9935 | | |
9936 | 24.0k | if (TypeFlags.isOverloadCvt()) |
9937 | 150 | return {Ops[0]->getType(), Ops.back()->getType()}; |
9938 | | |
9939 | 23.9k | assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); |
9940 | 23.9k | return {DefaultType}; |
9941 | 23.9k | } |
9942 | | |
9943 | | Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, |
9944 | | llvm::Type *Ty, |
9945 | 375 | ArrayRef<Value *> Ops) { |
9946 | 375 | assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) && |
9947 | 375 | "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()"); |
9948 | | |
9949 | 375 | unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue(); |
9950 | 375 | auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>( |
9951 | 375 | TypeFlags.isTupleSet() ? Ops[2]->getType()171 : Ty204 ); |
9952 | 375 | Value *Idx = ConstantInt::get(CGM.Int64Ty, |
9953 | 375 | I * SingleVecTy->getMinNumElements()); |
9954 | | |
9955 | 375 | if (TypeFlags.isTupleSet()) |
9956 | 171 | return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx); |
9957 | 204 | return Builder.CreateExtractVector(Ty, Ops[0], Idx); |
9958 | 375 | } |
9959 | | |
9960 | | Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, |
9961 | | llvm::Type *Ty, |
9962 | 147 | ArrayRef<Value *> Ops) { |
9963 | 147 | assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate"); |
9964 | | |
9965 | 147 | auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType()); |
9966 | 147 | unsigned MinElts = SrcTy->getMinNumElements(); |
9967 | 147 | Value *Call = llvm::PoisonValue::get(Ty); |
9968 | 588 | for (unsigned I = 0; I < Ops.size(); I++441 ) { |
9969 | 441 | Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); |
9970 | 441 | Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx); |
9971 | 441 | } |
9972 | | |
9973 | 147 | return Call; |
9974 | 147 | } |
9975 | | |
9976 | 26.4k | Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { |
9977 | | // Multi-vector results should be broken up into a single (wide) result |
9978 | | // vector. |
9979 | 26.4k | auto *StructTy = dyn_cast<StructType>(Call->getType()); |
9980 | 26.4k | if (!StructTy) |
9981 | 25.9k | return Call; |
9982 | | |
9983 | 522 | auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U)); |
9984 | 522 | if (!VTy) |
9985 | 0 | return Call; |
9986 | 522 | unsigned N = StructTy->getNumElements(); |
9987 | | |
9988 | | // We may need to emit a cast to a svbool_t |
9989 | 522 | bool IsPredTy = VTy->getElementType()->isIntegerTy(1); |
9990 | 522 | unsigned MinElts = IsPredTy ? 162 : VTy->getMinNumElements()520 ; |
9991 | | |
9992 | 522 | ScalableVectorType *WideVTy = |
9993 | 522 | ScalableVectorType::get(VTy->getElementType(), MinElts * N); |
9994 | 522 | Value *Ret = llvm::PoisonValue::get(WideVTy); |
9995 | 2.08k | for (unsigned I = 0; I < N; ++I1.56k ) { |
9996 | 1.56k | Value *SRet = Builder.CreateExtractValue(Call, I); |
9997 | 1.56k | assert(SRet->getType() == VTy && "Unexpected type for result value"); |
9998 | 1.56k | Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); |
9999 | | |
10000 | 1.56k | if (IsPredTy) |
10001 | 4 | SRet = EmitSVEPredicateCast( |
10002 | 4 | SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); |
10003 | | |
10004 | 1.56k | Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); |
10005 | 1.56k | } |
10006 | 522 | Call = Ret; |
10007 | | |
10008 | 522 | return Call; |
10009 | 522 | } |
10010 | | |
10011 | | void CodeGenFunction::GetAArch64SVEProcessedOperands( |
10012 | | unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops, |
10013 | 41.0k | SVETypeFlags TypeFlags) { |
10014 | | // Find out if any arguments are required to be integer constant expressions. |
10015 | 41.0k | unsigned ICEArguments = 0; |
10016 | 41.0k | ASTContext::GetBuiltinTypeError Error; |
10017 | 41.0k | getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); |
10018 | 41.0k | assert(Error == ASTContext::GE_None && "Should not codegen an error"); |
10019 | | |
10020 | | // Tuple set/get only requires one insert/extract vector, which is |
10021 | | // created by EmitSVETupleSetOrGet. |
10022 | 41.0k | bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet()40.8k ; |
10023 | | |
10024 | 141k | for (unsigned i = 0, e = E->getNumArgs(); i != e; i++100k ) { |
10025 | 100k | bool IsICE = ICEArguments & (1 << i); |
10026 | 100k | Value *Arg = EmitScalarExpr(E->getArg(i)); |
10027 | | |
10028 | 100k | if (IsICE) { |
10029 | | // If this is required to be a constant, constant fold it so that we know |
10030 | | // that the generated intrinsic gets a ConstantInt. |
10031 | 6.21k | std::optional<llvm::APSInt> Result = |
10032 | 6.21k | E->getArg(i)->getIntegerConstantExpr(getContext()); |
10033 | 6.21k | assert(Result && "Expected argument to be a constant"); |
10034 | | |
10035 | | // Immediates for SVE llvm intrinsics are always 32bit. We can safely |
10036 | | // truncate because the immediate has been range checked and no valid |
10037 | | // immediate requires more than a handful of bits. |
10038 | 6.21k | *Result = Result->extOrTrunc(32); |
10039 | 6.21k | Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); |
10040 | 6.21k | continue; |
10041 | 6.21k | } |
10042 | | |
10043 | 94.3k | if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())93.8k ) { |
10044 | 21.0k | Ops.push_back(Arg); |
10045 | 21.0k | continue; |
10046 | 21.0k | } |
10047 | | |
10048 | 73.3k | auto *VTy = cast<ScalableVectorType>(Arg->getType()); |
10049 | 73.3k | unsigned MinElts = VTy->getMinNumElements(); |
10050 | 73.3k | bool IsPred = VTy->getElementType()->isIntegerTy(1); |
10051 | 73.3k | unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 1623.8k : 12849.5k ); |
10052 | | |
10053 | 73.3k | if (N == 1) { |
10054 | 71.4k | Ops.push_back(Arg); |
10055 | 71.4k | continue; |
10056 | 71.4k | } |
10057 | | |
10058 | 7.44k | for (unsigned I = 0; 1.87k I < N; ++I5.56k ) { |
10059 | 5.56k | Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); |
10060 | 5.56k | auto *NewVTy = |
10061 | 5.56k | ScalableVectorType::get(VTy->getElementType(), MinElts / N); |
10062 | 5.56k | Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); |
10063 | 5.56k | } |
10064 | 1.87k | } |
10065 | 41.0k | } |
10066 | | |
10067 | | Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, |
10068 | 41.7k | const CallExpr *E) { |
10069 | 41.7k | llvm::Type *Ty = ConvertType(E->getType()); |
10070 | 41.7k | if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && |
10071 | 41.7k | BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x42.64k ) { |
10072 | 2.45k | Value *Val = EmitScalarExpr(E->getArg(0)); |
10073 | 2.45k | return EmitSVEReinterpret(Val, Ty); |
10074 | 2.45k | } |
10075 | | |
10076 | 39.2k | auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, |
10077 | 39.2k | AArch64SVEIntrinsicsProvenSorted); |
10078 | | |
10079 | 39.2k | llvm::SmallVector<Value *, 4> Ops; |
10080 | 39.2k | SVETypeFlags TypeFlags(Builtin->TypeModifier); |
10081 | 39.2k | GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); |
10082 | | |
10083 | 39.2k | if (TypeFlags.isLoad()) |
10084 | 1.10k | return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, |
10085 | 1.10k | TypeFlags.isZExtReturn()); |
10086 | 38.1k | else if (TypeFlags.isStore()) |
10087 | 292 | return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic); |
10088 | 37.8k | else if (TypeFlags.isGatherLoad()) |
10089 | 2.37k | return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10090 | 35.5k | else if (TypeFlags.isScatterStore()) |
10091 | 838 | return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10092 | 34.6k | else if (TypeFlags.isPrefetch()) |
10093 | 260 | return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10094 | 34.4k | else if (TypeFlags.isGatherPrefetch()) |
10095 | 160 | return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10096 | 34.2k | else if (TypeFlags.isStructLoad()) |
10097 | 640 | return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10098 | 33.6k | else if (TypeFlags.isStructStore()) |
10099 | 706 | return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10100 | 32.9k | else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()32.7k ) |
10101 | 375 | return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); |
10102 | 32.5k | else if (TypeFlags.isTupleCreate()) |
10103 | 147 | return EmitSVETupleCreate(TypeFlags, Ty, Ops); |
10104 | 32.3k | else if (TypeFlags.isUndef()) |
10105 | 5.81k | return UndefValue::get(Ty); |
10106 | 26.5k | else if (Builtin->LLVMIntrinsic != 0) { |
10107 | 25.9k | if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) |
10108 | 907 | InsertExplicitZeroOperand(Builder, Ty, Ops); |
10109 | | |
10110 | 25.9k | if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) |
10111 | 917 | InsertExplicitUndefOperand(Builder, Ty, Ops); |
10112 | | |
10113 | | // Some ACLE builtins leave out the argument to specify the predicate |
10114 | | // pattern, which is expected to be expanded to an SV_ALL pattern. |
10115 | 25.9k | if (TypeFlags.isAppendSVALL()) |
10116 | 29 | Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31)); |
10117 | 25.9k | if (TypeFlags.isInsertOp1SVALL()) |
10118 | 260 | Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31)); |
10119 | | |
10120 | | // Predicates must match the main datatype. |
10121 | 101k | for (unsigned i = 0, e = Ops.size(); i != e; ++i75.6k ) |
10122 | 75.6k | if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) |
10123 | 61.4k | if (PredTy->getElementType()->isIntegerTy(1)) |
10124 | 16.5k | Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); |
10125 | | |
10126 | | // Splat scalar operand to vector (intrinsics with _n infix) |
10127 | 25.9k | if (TypeFlags.hasSplatOperand()) { |
10128 | 7.32k | unsigned OpNo = TypeFlags.getSplatOperand(); |
10129 | 7.32k | Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); |
10130 | 7.32k | } |
10131 | | |
10132 | 25.9k | if (TypeFlags.isReverseCompare()) |
10133 | 270 | std::swap(Ops[1], Ops[2]); |
10134 | 25.7k | else if (TypeFlags.isReverseUSDOT()) |
10135 | 8 | std::swap(Ops[1], Ops[2]); |
10136 | 25.7k | else if (TypeFlags.isReverseMergeAnyBinOp() && |
10137 | 25.7k | TypeFlags.getMergeType() == SVETypeFlags::MergeAny828 ) |
10138 | 276 | std::swap(Ops[1], Ops[2]); |
10139 | 25.4k | else if (TypeFlags.isReverseMergeAnyAccOp() && |
10140 | 25.4k | TypeFlags.getMergeType() == SVETypeFlags::MergeAny840 ) |
10141 | 280 | std::swap(Ops[1], Ops[3]); |
10142 | | |
10143 | | // Predicated intrinsics with _z suffix need a select w/ zeroinitializer. |
10144 | 25.9k | if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) { |
10145 | 3.48k | llvm::Type *OpndTy = Ops[1]->getType(); |
10146 | 3.48k | auto *SplatZero = Constant::getNullValue(OpndTy); |
10147 | 3.48k | Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero); |
10148 | 3.48k | } |
10149 | | |
10150 | 25.9k | Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, |
10151 | 25.9k | getSVEOverloadTypes(TypeFlags, Ty, Ops)); |
10152 | 25.9k | Value *Call = Builder.CreateCall(F, Ops); |
10153 | | |
10154 | | // Predicate results must be converted to svbool_t. |
10155 | 25.9k | if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType())) |
10156 | 23.9k | if (PredTy->getScalarType()->isIntegerTy(1)) |
10157 | 1.97k | Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty)); |
10158 | | |
10159 | 25.9k | return FormSVEBuiltinResult(Call); |
10160 | 25.9k | } |
10161 | | |
10162 | 592 | switch (BuiltinID) { |
10163 | 0 | default: |
10164 | 0 | return nullptr; |
10165 | 3 | case SVE::BI__builtin_sve_svpsel_lane_b8: |
10166 | 6 | case SVE::BI__builtin_sve_svpsel_lane_b16: |
10167 | 9 | case SVE::BI__builtin_sve_svpsel_lane_b32: |
10168 | 12 | case SVE::BI__builtin_sve_svpsel_lane_b64: |
10169 | 15 | case SVE::BI__builtin_sve_svpsel_lane_c8: |
10170 | 18 | case SVE::BI__builtin_sve_svpsel_lane_c16: |
10171 | 21 | case SVE::BI__builtin_sve_svpsel_lane_c32: |
10172 | 24 | case SVE::BI__builtin_sve_svpsel_lane_c64: { |
10173 | 24 | bool IsSVCount = isa<TargetExtType>(Ops[0]->getType()); |
10174 | 24 | assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() == |
10175 | 24 | "aarch64.svcount")) && |
10176 | 24 | "Unexpected TargetExtType"); |
10177 | 24 | auto SVCountTy = |
10178 | 24 | llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); |
10179 | 24 | Function *CastFromSVCountF = |
10180 | 24 | CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); |
10181 | 24 | Function *CastToSVCountF = |
10182 | 24 | CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); |
10183 | | |
10184 | 24 | auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier)); |
10185 | 24 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy); |
10186 | 24 | llvm::Value *Ops0 = |
10187 | 24 | IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0])12 : Ops[0]12 ; |
10188 | 24 | llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy); |
10189 | 24 | llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]}); |
10190 | 24 | return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel)12 : PSel12 ; |
10191 | 24 | } |
10192 | 5 | case SVE::BI__builtin_sve_svmov_b_z: { |
10193 | | // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) |
10194 | 5 | SVETypeFlags TypeFlags(Builtin->TypeModifier); |
10195 | 5 | llvm::Type* OverloadedTy = getSVEType(TypeFlags); |
10196 | 5 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy); |
10197 | 5 | return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]}); |
10198 | 24 | } |
10199 | | |
10200 | 5 | case SVE::BI__builtin_sve_svnot_b_z: { |
10201 | | // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg) |
10202 | 5 | SVETypeFlags TypeFlags(Builtin->TypeModifier); |
10203 | 5 | llvm::Type* OverloadedTy = getSVEType(TypeFlags); |
10204 | 5 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy); |
10205 | 5 | return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); |
10206 | 24 | } |
10207 | | |
10208 | 6 | case SVE::BI__builtin_sve_svmovlb_u16: |
10209 | 12 | case SVE::BI__builtin_sve_svmovlb_u32: |
10210 | 18 | case SVE::BI__builtin_sve_svmovlb_u64: |
10211 | 18 | return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb); |
10212 | | |
10213 | 6 | case SVE::BI__builtin_sve_svmovlb_s16: |
10214 | 12 | case SVE::BI__builtin_sve_svmovlb_s32: |
10215 | 18 | case SVE::BI__builtin_sve_svmovlb_s64: |
10216 | 18 | return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb); |
10217 | | |
10218 | 6 | case SVE::BI__builtin_sve_svmovlt_u16: |
10219 | 12 | case SVE::BI__builtin_sve_svmovlt_u32: |
10220 | 18 | case SVE::BI__builtin_sve_svmovlt_u64: |
10221 | 18 | return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt); |
10222 | | |
10223 | 6 | case SVE::BI__builtin_sve_svmovlt_s16: |
10224 | 12 | case SVE::BI__builtin_sve_svmovlt_s32: |
10225 | 18 | case SVE::BI__builtin_sve_svmovlt_s64: |
10226 | 18 | return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt); |
10227 | | |
10228 | 6 | case SVE::BI__builtin_sve_svpmullt_u16: |
10229 | 12 | case SVE::BI__builtin_sve_svpmullt_u64: |
10230 | 18 | case SVE::BI__builtin_sve_svpmullt_n_u16: |
10231 | 24 | case SVE::BI__builtin_sve_svpmullt_n_u64: |
10232 | 24 | return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair); |
10233 | | |
10234 | 6 | case SVE::BI__builtin_sve_svpmullb_u16: |
10235 | 12 | case SVE::BI__builtin_sve_svpmullb_u64: |
10236 | 18 | case SVE::BI__builtin_sve_svpmullb_n_u16: |
10237 | 24 | case SVE::BI__builtin_sve_svpmullb_n_u64: |
10238 | 24 | return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair); |
10239 | | |
10240 | 5 | case SVE::BI__builtin_sve_svdup_n_b8: |
10241 | 10 | case SVE::BI__builtin_sve_svdup_n_b16: |
10242 | 15 | case SVE::BI__builtin_sve_svdup_n_b32: |
10243 | 20 | case SVE::BI__builtin_sve_svdup_n_b64: { |
10244 | 20 | Value *CmpNE = |
10245 | 20 | Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType())); |
10246 | 20 | llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags); |
10247 | 20 | Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy); |
10248 | 20 | return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty)); |
10249 | 15 | } |
10250 | | |
10251 | 6 | case SVE::BI__builtin_sve_svdupq_n_b8: |
10252 | 12 | case SVE::BI__builtin_sve_svdupq_n_b16: |
10253 | 18 | case SVE::BI__builtin_sve_svdupq_n_b32: |
10254 | 24 | case SVE::BI__builtin_sve_svdupq_n_b64: |
10255 | 29 | case SVE::BI__builtin_sve_svdupq_n_u8: |
10256 | 34 | case SVE::BI__builtin_sve_svdupq_n_s8: |
10257 | 39 | case SVE::BI__builtin_sve_svdupq_n_u64: |
10258 | 44 | case SVE::BI__builtin_sve_svdupq_n_f64: |
10259 | 49 | case SVE::BI__builtin_sve_svdupq_n_s64: |
10260 | 54 | case SVE::BI__builtin_sve_svdupq_n_u16: |
10261 | 59 | case SVE::BI__builtin_sve_svdupq_n_f16: |
10262 | 64 | case SVE::BI__builtin_sve_svdupq_n_bf16: |
10263 | 69 | case SVE::BI__builtin_sve_svdupq_n_s16: |
10264 | 74 | case SVE::BI__builtin_sve_svdupq_n_u32: |
10265 | 79 | case SVE::BI__builtin_sve_svdupq_n_f32: |
10266 | 84 | case SVE::BI__builtin_sve_svdupq_n_s32: { |
10267 | | // These builtins are implemented by storing each element to an array and using |
10268 | | // ld1rq to materialize a vector. |
10269 | 84 | unsigned NumOpnds = Ops.size(); |
10270 | | |
10271 | 84 | bool IsBoolTy = |
10272 | 84 | cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1); |
10273 | | |
10274 | | // For svdupq_n_b* the element type of is an integer of type 128/numelts, |
10275 | | // so that the compare can use the width that is natural for the expected |
10276 | | // number of predicate lanes. |
10277 | 84 | llvm::Type *EltTy = Ops[0]->getType(); |
10278 | 84 | if (IsBoolTy) |
10279 | 24 | EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds); |
10280 | | |
10281 | 84 | SmallVector<llvm::Value *, 16> VecOps; |
10282 | 674 | for (unsigned I = 0; I < NumOpnds; ++I590 ) |
10283 | 590 | VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy)); |
10284 | 84 | Value *Vec = BuildVector(VecOps); |
10285 | | |
10286 | 84 | llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy); |
10287 | 84 | Value *InsertSubVec = Builder.CreateInsertVector( |
10288 | 84 | OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0)); |
10289 | | |
10290 | 84 | Function *F = |
10291 | 84 | CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy); |
10292 | 84 | Value *DupQLane = |
10293 | 84 | Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)}); |
10294 | | |
10295 | 84 | if (!IsBoolTy) |
10296 | 60 | return DupQLane; |
10297 | | |
10298 | 24 | SVETypeFlags TypeFlags(Builtin->TypeModifier); |
10299 | 24 | Value *Pred = EmitSVEAllTruePred(TypeFlags); |
10300 | | |
10301 | | // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. |
10302 | 24 | F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne6 |
10303 | 24 | : Intrinsic::aarch64_sve_cmpne_wide18 , |
10304 | 24 | OverloadedTy); |
10305 | 24 | Value *Call = Builder.CreateCall( |
10306 | 24 | F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))}); |
10307 | 24 | return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty)); |
10308 | 84 | } |
10309 | | |
10310 | 5 | case SVE::BI__builtin_sve_svpfalse_b: |
10311 | 5 | return ConstantInt::getFalse(Ty); |
10312 | | |
10313 | 5 | case SVE::BI__builtin_sve_svpfalse_c: { |
10314 | 5 | auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16); |
10315 | 5 | Function *CastToSVCountF = |
10316 | 5 | CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty); |
10317 | 5 | return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy)); |
10318 | 84 | } |
10319 | | |
10320 | 5 | case SVE::BI__builtin_sve_svlen_bf16: |
10321 | 10 | case SVE::BI__builtin_sve_svlen_f16: |
10322 | 15 | case SVE::BI__builtin_sve_svlen_f32: |
10323 | 20 | case SVE::BI__builtin_sve_svlen_f64: |
10324 | 25 | case SVE::BI__builtin_sve_svlen_s8: |
10325 | 30 | case SVE::BI__builtin_sve_svlen_s16: |
10326 | 35 | case SVE::BI__builtin_sve_svlen_s32: |
10327 | 40 | case SVE::BI__builtin_sve_svlen_s64: |
10328 | 45 | case SVE::BI__builtin_sve_svlen_u8: |
10329 | 50 | case SVE::BI__builtin_sve_svlen_u16: |
10330 | 55 | case SVE::BI__builtin_sve_svlen_u32: |
10331 | 60 | case SVE::BI__builtin_sve_svlen_u64: { |
10332 | 60 | SVETypeFlags TF(Builtin->TypeModifier); |
10333 | 60 | auto VTy = cast<llvm::VectorType>(getSVEType(TF)); |
10334 | 60 | auto *NumEls = |
10335 | 60 | llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue()); |
10336 | | |
10337 | 60 | Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); |
10338 | 60 | return Builder.CreateMul(NumEls, Builder.CreateCall(F)); |
10339 | 55 | } |
10340 | | |
10341 | 6 | case SVE::BI__builtin_sve_svtbl2_u8: |
10342 | 12 | case SVE::BI__builtin_sve_svtbl2_s8: |
10343 | 18 | case SVE::BI__builtin_sve_svtbl2_u16: |
10344 | 24 | case SVE::BI__builtin_sve_svtbl2_s16: |
10345 | 30 | case SVE::BI__builtin_sve_svtbl2_u32: |
10346 | 36 | case SVE::BI__builtin_sve_svtbl2_s32: |
10347 | 42 | case SVE::BI__builtin_sve_svtbl2_u64: |
10348 | 48 | case SVE::BI__builtin_sve_svtbl2_s64: |
10349 | 54 | case SVE::BI__builtin_sve_svtbl2_f16: |
10350 | 60 | case SVE::BI__builtin_sve_svtbl2_bf16: |
10351 | 66 | case SVE::BI__builtin_sve_svtbl2_f32: |
10352 | 72 | case SVE::BI__builtin_sve_svtbl2_f64: { |
10353 | 72 | SVETypeFlags TF(Builtin->TypeModifier); |
10354 | 72 | auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF)); |
10355 | 72 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); |
10356 | 72 | return Builder.CreateCall(F, Ops); |
10357 | 66 | } |
10358 | | |
10359 | 7 | case SVE::BI__builtin_sve_svset_neonq_s8: |
10360 | 12 | case SVE::BI__builtin_sve_svset_neonq_s16: |
10361 | 17 | case SVE::BI__builtin_sve_svset_neonq_s32: |
10362 | 22 | case SVE::BI__builtin_sve_svset_neonq_s64: |
10363 | 27 | case SVE::BI__builtin_sve_svset_neonq_u8: |
10364 | 32 | case SVE::BI__builtin_sve_svset_neonq_u16: |
10365 | 37 | case SVE::BI__builtin_sve_svset_neonq_u32: |
10366 | 42 | case SVE::BI__builtin_sve_svset_neonq_u64: |
10367 | 47 | case SVE::BI__builtin_sve_svset_neonq_f16: |
10368 | 52 | case SVE::BI__builtin_sve_svset_neonq_f32: |
10369 | 57 | case SVE::BI__builtin_sve_svset_neonq_f64: |
10370 | 64 | case SVE::BI__builtin_sve_svset_neonq_bf16: { |
10371 | 64 | return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0)); |
10372 | 57 | } |
10373 | | |
10374 | 7 | case SVE::BI__builtin_sve_svget_neonq_s8: |
10375 | 12 | case SVE::BI__builtin_sve_svget_neonq_s16: |
10376 | 17 | case SVE::BI__builtin_sve_svget_neonq_s32: |
10377 | 22 | case SVE::BI__builtin_sve_svget_neonq_s64: |
10378 | 27 | case SVE::BI__builtin_sve_svget_neonq_u8: |
10379 | 32 | case SVE::BI__builtin_sve_svget_neonq_u16: |
10380 | 37 | case SVE::BI__builtin_sve_svget_neonq_u32: |
10381 | 42 | case SVE::BI__builtin_sve_svget_neonq_u64: |
10382 | 47 | case SVE::BI__builtin_sve_svget_neonq_f16: |
10383 | 52 | case SVE::BI__builtin_sve_svget_neonq_f32: |
10384 | 57 | case SVE::BI__builtin_sve_svget_neonq_f64: |
10385 | 64 | case SVE::BI__builtin_sve_svget_neonq_bf16: { |
10386 | 64 | return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0)); |
10387 | 57 | } |
10388 | | |
10389 | 7 | case SVE::BI__builtin_sve_svdup_neonq_s8: |
10390 | 12 | case SVE::BI__builtin_sve_svdup_neonq_s16: |
10391 | 17 | case SVE::BI__builtin_sve_svdup_neonq_s32: |
10392 | 22 | case SVE::BI__builtin_sve_svdup_neonq_s64: |
10393 | 27 | case SVE::BI__builtin_sve_svdup_neonq_u8: |
10394 | 32 | case SVE::BI__builtin_sve_svdup_neonq_u16: |
10395 | 37 | case SVE::BI__builtin_sve_svdup_neonq_u32: |
10396 | 42 | case SVE::BI__builtin_sve_svdup_neonq_u64: |
10397 | 47 | case SVE::BI__builtin_sve_svdup_neonq_f16: |
10398 | 52 | case SVE::BI__builtin_sve_svdup_neonq_f32: |
10399 | 57 | case SVE::BI__builtin_sve_svdup_neonq_f64: |
10400 | 64 | case SVE::BI__builtin_sve_svdup_neonq_bf16: { |
10401 | 64 | Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0], |
10402 | 64 | Builder.getInt64(0)); |
10403 | 64 | return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty}, |
10404 | 64 | {Insert, Builder.getInt64(0)}); |
10405 | 57 | } |
10406 | 592 | } |
10407 | | |
10408 | | /// Should not happen |
10409 | 0 | return nullptr; |
10410 | 592 | } |
10411 | | |
10412 | | Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, |
10413 | 1.73k | const CallExpr *E) { |
10414 | 1.73k | auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, |
10415 | 1.73k | AArch64SMEIntrinsicsProvenSorted); |
10416 | | |
10417 | 1.73k | llvm::SmallVector<Value *, 4> Ops; |
10418 | 1.73k | SVETypeFlags TypeFlags(Builtin->TypeModifier); |
10419 | 1.73k | GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); |
10420 | | |
10421 | 1.73k | if (TypeFlags.isLoad() || TypeFlags.isStore()1.61k ) |
10422 | 244 | return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10423 | 1.48k | else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()1.00k ) |
10424 | 960 | return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10425 | 528 | else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za || |
10426 | 528 | BuiltinID == SME::BI__builtin_sme_svzero_za519 ) |
10427 | 12 | return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10428 | 516 | else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za || |
10429 | 516 | BuiltinID == SME::BI__builtin_sme_svstr_vnum_za507 || |
10430 | 516 | BuiltinID == SME::BI__builtin_sme_svldr_za498 || |
10431 | 516 | BuiltinID == SME::BI__builtin_sme_svstr_za495 ) |
10432 | 24 | return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); |
10433 | | |
10434 | | // Should not happen! |
10435 | 492 | if (Builtin->LLVMIntrinsic == 0) |
10436 | 0 | return nullptr; |
10437 | | |
10438 | | // Predicates must match the main datatype. |
10439 | 2.85k | for (unsigned i = 0, e = Ops.size(); 492 i != e; ++i2.36k ) |
10440 | 2.36k | if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) |
10441 | 1.88k | if (PredTy->getElementType()->isIntegerTy(1)) |
10442 | 400 | Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); |
10443 | | |
10444 | 492 | Function *F = |
10445 | 492 | TypeFlags.isOverloadNone() |
10446 | 492 | ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)12 |
10447 | 492 | : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)})480 ; |
10448 | 492 | Value *Call = Builder.CreateCall(F, Ops); |
10449 | | |
10450 | 492 | return FormSVEBuiltinResult(Call); |
10451 | 492 | } |
10452 | | |
10453 | | Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, |
10454 | | const CallExpr *E, |
10455 | 47.9k | llvm::Triple::ArchType Arch) { |
10456 | 47.9k | if (BuiltinID >= clang::AArch64::FirstSVEBuiltin && |
10457 | 47.9k | BuiltinID <= clang::AArch64::LastSVEBuiltin44.1k ) |
10458 | 41.7k | return EmitAArch64SVEBuiltinExpr(BuiltinID, E); |
10459 | | |
10460 | 6.20k | if (BuiltinID >= clang::AArch64::FirstSMEBuiltin && |
10461 | 6.20k | BuiltinID <= clang::AArch64::LastSMEBuiltin2.44k ) |
10462 | 1.73k | return EmitAArch64SMEBuiltinExpr(BuiltinID, E); |
10463 | | |
10464 | 4.47k | unsigned HintID = static_cast<unsigned>(-1); |
10465 | 4.47k | switch (BuiltinID) { |
10466 | 4.41k | default: break; |
10467 | 4.41k | case clang::AArch64::BI__builtin_arm_nop: |
10468 | 7 | HintID = 0; |
10469 | 7 | break; |
10470 | 7 | case clang::AArch64::BI__builtin_arm_yield: |
10471 | 9 | case clang::AArch64::BI__yield: |
10472 | 9 | HintID = 1; |
10473 | 9 | break; |
10474 | 7 | case clang::AArch64::BI__builtin_arm_wfe: |
10475 | 9 | case clang::AArch64::BI__wfe: |
10476 | 9 | HintID = 2; |
10477 | 9 | break; |
10478 | 7 | case clang::AArch64::BI__builtin_arm_wfi: |
10479 | 9 | case clang::AArch64::BI__wfi: |
10480 | 9 | HintID = 3; |
10481 | 9 | break; |
10482 | 7 | case clang::AArch64::BI__builtin_arm_sev: |
10483 | 9 | case clang::AArch64::BI__sev: |
10484 | 9 | HintID = 4; |
10485 | 9 | break; |
10486 | 7 | case clang::AArch64::BI__builtin_arm_sevl: |
10487 | 9 | case clang::AArch64::BI__sevl: |
10488 | 9 | HintID = 5; |
10489 | 9 | break; |
10490 | 4.47k | } |
10491 | | |
10492 | 4.47k | if (HintID != static_cast<unsigned>(-1)) { |
10493 | 52 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); |
10494 | 52 | return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); |
10495 | 52 | } |
10496 | | |
10497 | 4.41k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { |
10498 | 7 | assert((getContext().getTypeSize(E->getType()) == 32) && |
10499 | 7 | "rbit of unusual size!"); |
10500 | 7 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10501 | 7 | return Builder.CreateCall( |
10502 | 7 | CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); |
10503 | 7 | } |
10504 | 4.41k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) { |
10505 | 10 | assert((getContext().getTypeSize(E->getType()) == 64) && |
10506 | 10 | "rbit of unusual size!"); |
10507 | 10 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10508 | 10 | return Builder.CreateCall( |
10509 | 10 | CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); |
10510 | 10 | } |
10511 | | |
10512 | 4.40k | if (BuiltinID == clang::AArch64::BI__builtin_arm_clz || |
10513 | 4.40k | BuiltinID == clang::AArch64::BI__builtin_arm_clz644.39k ) { |
10514 | 12 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10515 | 12 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); |
10516 | 12 | Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); |
10517 | 12 | if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64) |
10518 | 8 | Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); |
10519 | 12 | return Res; |
10520 | 12 | } |
10521 | | |
10522 | 4.38k | if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) { |
10523 | 7 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10524 | 7 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, |
10525 | 7 | "cls"); |
10526 | 7 | } |
10527 | 4.38k | if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) { |
10528 | 14 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10529 | 14 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, |
10530 | 14 | "cls"); |
10531 | 14 | } |
10532 | | |
10533 | 4.36k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf || |
10534 | 4.36k | BuiltinID == clang::AArch64::BI__builtin_arm_rint32z4.36k ) { |
10535 | 2 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10536 | 2 | llvm::Type *Ty = Arg->getType(); |
10537 | 2 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty), |
10538 | 2 | Arg, "frint32z"); |
10539 | 2 | } |
10540 | | |
10541 | 4.36k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf || |
10542 | 4.36k | BuiltinID == clang::AArch64::BI__builtin_arm_rint64z4.36k ) { |
10543 | 2 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10544 | 2 | llvm::Type *Ty = Arg->getType(); |
10545 | 2 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty), |
10546 | 2 | Arg, "frint64z"); |
10547 | 2 | } |
10548 | | |
10549 | 4.36k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf || |
10550 | 4.36k | BuiltinID == clang::AArch64::BI__builtin_arm_rint32x4.36k ) { |
10551 | 2 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10552 | 2 | llvm::Type *Ty = Arg->getType(); |
10553 | 2 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty), |
10554 | 2 | Arg, "frint32x"); |
10555 | 2 | } |
10556 | | |
10557 | 4.36k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf || |
10558 | 4.36k | BuiltinID == clang::AArch64::BI__builtin_arm_rint64x4.36k ) { |
10559 | 2 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10560 | 2 | llvm::Type *Ty = Arg->getType(); |
10561 | 2 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty), |
10562 | 2 | Arg, "frint64x"); |
10563 | 2 | } |
10564 | | |
10565 | 4.36k | if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) { |
10566 | 6 | assert((getContext().getTypeSize(E->getType()) == 32) && |
10567 | 6 | "__jcvt of unusual size!"); |
10568 | 6 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
10569 | 6 | return Builder.CreateCall( |
10570 | 6 | CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); |
10571 | 6 | } |
10572 | | |
10573 | 4.35k | if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b || |
10574 | 4.35k | BuiltinID == clang::AArch64::BI__builtin_arm_st64b4.35k || |
10575 | 4.35k | BuiltinID == clang::AArch64::BI__builtin_arm_st64bv4.34k || |
10576 | 4.35k | BuiltinID == clang::AArch64::BI__builtin_arm_st64bv04.34k ) { |
10577 | 16 | llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0)); |
10578 | 16 | llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1)); |
10579 | | |
10580 | 16 | if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) { |
10581 | | // Load from the address via an LLVM intrinsic, receiving a |
10582 | | // tuple of 8 i64 words, and store each one to ValPtr. |
10583 | 4 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b); |
10584 | 4 | llvm::Value *Val = Builder.CreateCall(F, MemAddr); |
10585 | 4 | llvm::Value *ToRet; |
10586 | 36 | for (size_t i = 0; i < 8; i++32 ) { |
10587 | 32 | llvm::Value *ValOffsetPtr = |
10588 | 32 | Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); |
10589 | 32 | Address Addr = |
10590 | 32 | Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); |
10591 | 32 | ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr); |
10592 | 32 | } |
10593 | 4 | return ToRet; |
10594 | 12 | } else { |
10595 | | // Load 8 i64 words from ValPtr, and store them to the address |
10596 | | // via an LLVM intrinsic. |
10597 | 12 | SmallVector<llvm::Value *, 9> Args; |
10598 | 12 | Args.push_back(MemAddr); |
10599 | 108 | for (size_t i = 0; i < 8; i++96 ) { |
10600 | 96 | llvm::Value *ValOffsetPtr = |
10601 | 96 | Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); |
10602 | 96 | Address Addr = |
10603 | 96 | Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); |
10604 | 96 | Args.push_back(Builder.CreateLoad(Addr)); |
10605 | 96 | } |
10606 | | |
10607 | 12 | auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b |
10608 | 12 | ? Intrinsic::aarch64_st64b4 |
10609 | 12 | : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv8 |
10610 | 8 | ? Intrinsic::aarch64_st64bv4 |
10611 | 8 | : Intrinsic::aarch64_st64bv04 ); |
10612 | 12 | Function *F = CGM.getIntrinsic(Intr); |
10613 | 12 | return Builder.CreateCall(F, Args); |
10614 | 12 | } |
10615 | 16 | } |
10616 | | |
10617 | 4.33k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr || |
10618 | 4.33k | BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs4.33k ) { |
10619 | | |
10620 | 10 | auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr |
10621 | 10 | ? Intrinsic::aarch64_rndr5 |
10622 | 10 | : Intrinsic::aarch64_rndrrs5 ); |
10623 | 10 | Function *F = CGM.getIntrinsic(Intr); |
10624 | 10 | llvm::Value *Val = Builder.CreateCall(F); |
10625 | 10 | Value *RandomValue = Builder.CreateExtractValue(Val, 0); |
10626 | 10 | Value *Status = Builder.CreateExtractValue(Val, 1); |
10627 | | |
10628 | 10 | Address MemAddress = EmitPointerWithAlignment(E->getArg(0)); |
10629 | 10 | Builder.CreateStore(RandomValue, MemAddress); |
10630 | 10 | Status = Builder.CreateZExt(Status, Int32Ty); |
10631 | 10 | return Status; |
10632 | 10 | } |
10633 | | |
10634 | 4.32k | if (BuiltinID == clang::AArch64::BI__clear_cache) { |
10635 | 3 | assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); |
10636 | 3 | const FunctionDecl *FD = E->getDirectCallee(); |
10637 | 3 | Value *Ops[2]; |
10638 | 9 | for (unsigned i = 0; i < 2; i++6 ) |
10639 | 6 | Ops[i] = EmitScalarExpr(E->getArg(i)); |
10640 | 3 | llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); |
10641 | 3 | llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); |
10642 | 3 | StringRef Name = FD->getName(); |
10643 | 3 | return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); |
10644 | 3 | } |
10645 | | |
10646 | 4.32k | if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || |
10647 | 4.32k | BuiltinID == clang::AArch64::BI__builtin_arm_ldaex4.31k ) && |
10648 | 4.32k | getContext().getTypeSize(E->getType()) == 12825 ) { |
10649 | 2 | Function *F = |
10650 | 2 | CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex |
10651 | 2 | ? Intrinsic::aarch64_ldaxp1 |
10652 | 2 | : Intrinsic::aarch64_ldxp1 ); |
10653 | | |
10654 | 2 | Value *LdPtr = EmitScalarExpr(E->getArg(0)); |
10655 | 2 | Value *Val = Builder.CreateCall(F, LdPtr, "ldxp"); |
10656 | | |
10657 | 2 | Value *Val0 = Builder.CreateExtractValue(Val, 1); |
10658 | 2 | Value *Val1 = Builder.CreateExtractValue(Val, 0); |
10659 | 2 | llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); |
10660 | 2 | Val0 = Builder.CreateZExt(Val0, Int128Ty); |
10661 | 2 | Val1 = Builder.CreateZExt(Val1, Int128Ty); |
10662 | | |
10663 | 2 | Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); |
10664 | 2 | Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); |
10665 | 2 | Val = Builder.CreateOr(Val, Val1); |
10666 | 2 | return Builder.CreateBitCast(Val, ConvertType(E->getType())); |
10667 | 4.32k | } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || |
10668 | 4.32k | BuiltinID == clang::AArch64::BI__builtin_arm_ldaex4.30k ) { |
10669 | 23 | Value *LoadAddr = EmitScalarExpr(E->getArg(0)); |
10670 | | |
10671 | 23 | QualType Ty = E->getType(); |
10672 | 23 | llvm::Type *RealResTy = ConvertType(Ty); |
10673 | 23 | llvm::Type *IntTy = |
10674 | 23 | llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); |
10675 | | |
10676 | 23 | Function *F = |
10677 | 23 | CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex |
10678 | 23 | ? Intrinsic::aarch64_ldaxr9 |
10679 | 23 | : Intrinsic::aarch64_ldxr14 , |
10680 | 23 | UnqualPtrTy); |
10681 | 23 | CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); |
10682 | 23 | Val->addParamAttr( |
10683 | 23 | 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); |
10684 | | |
10685 | 23 | if (RealResTy->isPointerTy()) |
10686 | 4 | return Builder.CreateIntToPtr(Val, RealResTy); |
10687 | | |
10688 | 19 | llvm::Type *IntResTy = llvm::IntegerType::get( |
10689 | 19 | getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); |
10690 | 19 | return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), |
10691 | 19 | RealResTy); |
10692 | 23 | } |
10693 | | |
10694 | 4.30k | if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex || |
10695 | 4.30k | BuiltinID == clang::AArch64::BI__builtin_arm_stlex4.28k ) && |
10696 | 4.30k | getContext().getTypeSize(E->getArg(0)->getType()) == 12821 ) { |
10697 | 2 | Function *F = |
10698 | 2 | CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex |
10699 | 2 | ? Intrinsic::aarch64_stlxp1 |
10700 | 2 | : Intrinsic::aarch64_stxp1 ); |
10701 | 2 | llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); |
10702 | | |
10703 | 2 | Address Tmp = CreateMemTemp(E->getArg(0)->getType()); |
10704 | 2 | EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); |
10705 | | |
10706 | 2 | Tmp = Tmp.withElementType(STy); |
10707 | 2 | llvm::Value *Val = Builder.CreateLoad(Tmp); |
10708 | | |
10709 | 2 | Value *Arg0 = Builder.CreateExtractValue(Val, 0); |
10710 | 2 | Value *Arg1 = Builder.CreateExtractValue(Val, 1); |
10711 | 2 | Value *StPtr = EmitScalarExpr(E->getArg(1)); |
10712 | 2 | return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); |
10713 | 2 | } |
10714 | | |
10715 | 4.29k | if (BuiltinID == clang::AArch64::BI__builtin_arm_strex || |
10716 | 4.29k | BuiltinID == clang::AArch64::BI__builtin_arm_stlex4.28k ) { |
10717 | 19 | Value *StoreVal = EmitScalarExpr(E->getArg(0)); |
10718 | 19 | Value *StoreAddr = EmitScalarExpr(E->getArg(1)); |
10719 | | |
10720 | 19 | QualType Ty = E->getArg(0)->getType(); |
10721 | 19 | llvm::Type *StoreTy = |
10722 | 19 | llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); |
10723 | | |
10724 | 19 | if (StoreVal->getType()->isPointerTy()) |
10725 | 2 | StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); |
10726 | 17 | else { |
10727 | 17 | llvm::Type *IntTy = llvm::IntegerType::get( |
10728 | 17 | getLLVMContext(), |
10729 | 17 | CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); |
10730 | 17 | StoreVal = Builder.CreateBitCast(StoreVal, IntTy); |
10731 | 17 | StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); |
10732 | 17 | } |
10733 | | |
10734 | 19 | Function *F = |
10735 | 19 | CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex |
10736 | 19 | ? Intrinsic::aarch64_stlxr7 |
10737 | 19 | : Intrinsic::aarch64_stxr12 , |
10738 | 19 | StoreAddr->getType()); |
10739 | 19 | CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); |
10740 | 19 | CI->addParamAttr( |
10741 | 19 | 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); |
10742 | 19 | return CI; |
10743 | 19 | } |
10744 | | |
10745 | 4.27k | if (BuiltinID == clang::AArch64::BI__getReg) { |
10746 | 4 | Expr::EvalResult Result; |
10747 | 4 | if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) |
10748 | 0 | llvm_unreachable("Sema will ensure that the parameter is constant"); |
10749 | | |
10750 | 4 | llvm::APSInt Value = Result.Val.getInt(); |
10751 | 4 | LLVMContext &Context = CGM.getLLVMContext(); |
10752 | 4 | std::string Reg = Value == 31 ? "sp"2 : "x" + toString(Value, 10)2 ; |
10753 | | |
10754 | 4 | llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; |
10755 | 4 | llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); |
10756 | 4 | llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); |
10757 | | |
10758 | 4 | llvm::Function *F = |
10759 | 4 | CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); |
10760 | 4 | return Builder.CreateCall(F, Metadata); |
10761 | 4 | } |
10762 | | |
10763 | 4.27k | if (BuiltinID == clang::AArch64::BI__break) { |
10764 | 2 | Expr::EvalResult Result; |
10765 | 2 | if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) |
10766 | 0 | llvm_unreachable("Sema will ensure that the parameter is constant"); |
10767 | | |
10768 | 2 | llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break); |
10769 | 2 | return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); |
10770 | 2 | } |
10771 | | |
10772 | 4.27k | if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) { |
10773 | 1 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); |
10774 | 1 | return Builder.CreateCall(F); |
10775 | 1 | } |
10776 | | |
10777 | 4.27k | if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier) |
10778 | 2 | return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, |
10779 | 2 | llvm::SyncScope::SingleThread); |
10780 | | |
10781 | | // CRC32 |
10782 | 4.27k | Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; |
10783 | 4.27k | switch (BuiltinID) { |
10784 | 7 | case clang::AArch64::BI__builtin_arm_crc32b: |
10785 | 7 | CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; |
10786 | 6 | case clang::AArch64::BI__builtin_arm_crc32cb: |
10787 | 6 | CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; |
10788 | 6 | case clang::AArch64::BI__builtin_arm_crc32h: |
10789 | 6 | CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; |
10790 | 6 | case clang::AArch64::BI__builtin_arm_crc32ch: |
10791 | 6 | CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; |
10792 | 6 | case clang::AArch64::BI__builtin_arm_crc32w: |
10793 | 6 | CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; |
10794 | 6 | case clang::AArch64::BI__builtin_arm_crc32cw: |
10795 | 6 | CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; |
10796 | 6 | case clang::AArch64::BI__builtin_arm_crc32d: |
10797 | 6 | CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; |
10798 | 7 | case clang::AArch64::BI__builtin_arm_crc32cd: |
10799 | 7 | CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; |
10800 | 4.27k | } |
10801 | | |
10802 | 4.27k | if (CRCIntrinsicID != Intrinsic::not_intrinsic) { |
10803 | 50 | Value *Arg0 = EmitScalarExpr(E->getArg(0)); |
10804 | 50 | Value *Arg1 = EmitScalarExpr(E->getArg(1)); |
10805 | 50 | Function *F = CGM.getIntrinsic(CRCIntrinsicID); |
10806 | | |
10807 | 50 | llvm::Type *DataTy = F->getFunctionType()->getParamType(1); |
10808 | 50 | Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); |
10809 | | |
10810 | 50 | return Builder.CreateCall(F, {Arg0, Arg1}); |
10811 | 50 | } |
10812 | | |
10813 | | // Memory Operations (MOPS) |
10814 | 4.22k | if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) { |
10815 | 13 | Value *Dst = EmitScalarExpr(E->getArg(0)); |
10816 | 13 | Value *Val = EmitScalarExpr(E->getArg(1)); |
10817 | 13 | Value *Size = EmitScalarExpr(E->getArg(2)); |
10818 | 13 | Dst = Builder.CreatePointerCast(Dst, Int8PtrTy); |
10819 | 13 | Val = Builder.CreateTrunc(Val, Int8Ty); |
10820 | 13 | Size = Builder.CreateIntCast(Size, Int64Ty, false); |
10821 | 13 | return Builder.CreateCall( |
10822 | 13 | CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); |
10823 | 13 | } |
10824 | | |
10825 | | // Memory Tagging Extensions (MTE) Intrinsics |
10826 | 4.20k | Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; |
10827 | 4.20k | switch (BuiltinID) { |
10828 | 7 | case clang::AArch64::BI__builtin_arm_irg: |
10829 | 7 | MTEIntrinsicID = Intrinsic::aarch64_irg; break; |
10830 | 4 | case clang::AArch64::BI__builtin_arm_addg: |
10831 | 4 | MTEIntrinsicID = Intrinsic::aarch64_addg; break; |
10832 | 2 | case clang::AArch64::BI__builtin_arm_gmi: |
10833 | 2 | MTEIntrinsicID = Intrinsic::aarch64_gmi; break; |
10834 | 4 | case clang::AArch64::BI__builtin_arm_ldg: |
10835 | 4 | MTEIntrinsicID = Intrinsic::aarch64_ldg; break; |
10836 | 2 | case clang::AArch64::BI__builtin_arm_stg: |
10837 | 2 | MTEIntrinsicID = Intrinsic::aarch64_stg; break; |
10838 | 8 | case clang::AArch64::BI__builtin_arm_subp: |
10839 | 8 | MTEIntrinsicID = Intrinsic::aarch64_subp; break; |
10840 | 4.20k | } |
10841 | | |
10842 | 4.20k | if (MTEIntrinsicID != Intrinsic::not_intrinsic) { |
10843 | 27 | llvm::Type *T = ConvertType(E->getType()); |
10844 | | |
10845 | 27 | if (MTEIntrinsicID == Intrinsic::aarch64_irg) { |
10846 | 7 | Value *Pointer = EmitScalarExpr(E->getArg(0)); |
10847 | 7 | Value *Mask = EmitScalarExpr(E->getArg(1)); |
10848 | | |
10849 | 7 | Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); |
10850 | 7 | Mask = Builder.CreateZExt(Mask, Int64Ty); |
10851 | 7 | Value *RV = Builder.CreateCall( |
10852 | 7 | CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); |
10853 | 7 | return Builder.CreatePointerCast(RV, T); |
10854 | 7 | } |
10855 | 20 | if (MTEIntrinsicID == Intrinsic::aarch64_addg) { |
10856 | 4 | Value *Pointer = EmitScalarExpr(E->getArg(0)); |
10857 | 4 | Value *TagOffset = EmitScalarExpr(E->getArg(1)); |
10858 | | |
10859 | 4 | Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); |
10860 | 4 | TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); |
10861 | 4 | Value *RV = Builder.CreateCall( |
10862 | 4 | CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); |
10863 | 4 | return Builder.CreatePointerCast(RV, T); |
10864 | 4 | } |
10865 | 16 | if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { |
10866 | 2 | Value *Pointer = EmitScalarExpr(E->getArg(0)); |
10867 | 2 | Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); |
10868 | | |
10869 | 2 | ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); |
10870 | 2 | Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); |
10871 | 2 | return Builder.CreateCall( |
10872 | 2 | CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); |
10873 | 2 | } |
10874 | | // Although it is possible to supply a different return |
10875 | | // address (first arg) to this intrinsic, for now we set |
10876 | | // return address same as input address. |
10877 | 14 | if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { |
10878 | 4 | Value *TagAddress = EmitScalarExpr(E->getArg(0)); |
10879 | 4 | TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); |
10880 | 4 | Value *RV = Builder.CreateCall( |
10881 | 4 | CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); |
10882 | 4 | return Builder.CreatePointerCast(RV, T); |
10883 | 4 | } |
10884 | | // Although it is possible to supply a different tag (to set) |
10885 | | // to this intrinsic (as first arg), for now we supply |
10886 | | // the tag that is in input address arg (common use case). |
10887 | 10 | if (MTEIntrinsicID == Intrinsic::aarch64_stg) { |
10888 | 2 | Value *TagAddress = EmitScalarExpr(E->getArg(0)); |
10889 | 2 | TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); |
10890 | 2 | return Builder.CreateCall( |
10891 | 2 | CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); |
10892 | 2 | } |
10893 | 8 | if (MTEIntrinsicID == Intrinsic::aarch64_subp) { |
10894 | 8 | Value *PointerA = EmitScalarExpr(E->getArg(0)); |
10895 | 8 | Value *PointerB = EmitScalarExpr(E->getArg(1)); |
10896 | 8 | PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); |
10897 | 8 | PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); |
10898 | 8 | return Builder.CreateCall( |
10899 | 8 | CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); |
10900 | 8 | } |
10901 | 8 | } |
10902 | | |
10903 | 4.18k | if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || |
10904 | 4.18k | BuiltinID == clang::AArch64::BI__builtin_arm_rsr644.16k || |
10905 | 4.18k | BuiltinID == clang::AArch64::BI__builtin_arm_rsr1284.15k || |
10906 | 4.18k | BuiltinID == clang::AArch64::BI__builtin_arm_rsrp4.14k || |
10907 | 4.18k | BuiltinID == clang::AArch64::BI__builtin_arm_wsr4.14k || |
10908 | 4.18k | BuiltinID == clang::AArch64::BI__builtin_arm_wsr644.12k || |
10909 | 4.18k | BuiltinID == clang::AArch64::BI__builtin_arm_wsr1284.11k || |
10910 | 4.18k | BuiltinID == clang::AArch64::BI__builtin_arm_wsrp4.10k ) { |
10911 | | |
10912 | 80 | SpecialRegisterAccessKind AccessKind = Write; |
10913 | 80 | if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || |
10914 | 80 | BuiltinID == clang::AArch64::BI__builtin_arm_rsr6467 || |
10915 | 80 | BuiltinID == clang::AArch64::BI__builtin_arm_rsr12854 || |
10916 | 80 | BuiltinID == clang::AArch64::BI__builtin_arm_rsrp49 ) |
10917 | 40 | AccessKind = VolatileRead; |
10918 | | |
10919 | 80 | bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || |
10920 | 80 | BuiltinID == clang::AArch64::BI__builtin_arm_wsrp71 ; |
10921 | | |
10922 | 80 | bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr || |
10923 | 80 | BuiltinID == clang::AArch64::BI__builtin_arm_wsr67 ; |
10924 | | |
10925 | 80 | bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || |
10926 | 80 | BuiltinID == clang::AArch64::BI__builtin_arm_wsr12875 ; |
10927 | | |
10928 | 80 | llvm::Type *ValueType; |
10929 | 80 | llvm::Type *RegisterType = Int64Ty; |
10930 | 80 | if (Is32Bit) { |
10931 | 26 | ValueType = Int32Ty; |
10932 | 54 | } else if (Is128Bit) { |
10933 | 10 | llvm::Type *Int128Ty = |
10934 | 10 | llvm::IntegerType::getInt128Ty(CGM.getLLVMContext()); |
10935 | 10 | ValueType = Int128Ty; |
10936 | 10 | RegisterType = Int128Ty; |
10937 | 44 | } else if (IsPointerBuiltin) { |
10938 | 18 | ValueType = VoidPtrTy; |
10939 | 26 | } else { |
10940 | 26 | ValueType = Int64Ty; |
10941 | 26 | }; |
10942 | | |
10943 | 80 | return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, |
10944 | 80 | AccessKind); |
10945 | 80 | } |
10946 | | |
10947 | 4.10k | if (BuiltinID == clang::AArch64::BI_ReadStatusReg || |
10948 | 4.10k | BuiltinID == clang::AArch64::BI_WriteStatusReg4.06k ) { |
10949 | 80 | LLVMContext &Context = CGM.getLLVMContext(); |
10950 | | |
10951 | 80 | unsigned SysReg = |
10952 | 80 | E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue(); |
10953 | | |
10954 | 80 | std::string SysRegStr; |
10955 | 80 | llvm::raw_string_ostream(SysRegStr) << |
10956 | 80 | ((1 << 1) | ((SysReg >> 14) & 1)) << ":" << |
10957 | 80 | ((SysReg >> 11) & 7) << ":" << |
10958 | 80 | ((SysReg >> 7) & 15) << ":" << |
10959 | 80 | ((SysReg >> 3) & 15) << ":" << |
10960 | 80 | ( SysReg & 7); |
10961 | | |
10962 | 80 | llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) }; |
10963 | 80 | llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); |
10964 | 80 | llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); |
10965 | | |
10966 | 80 | llvm::Type *RegisterType = Int64Ty; |
10967 | 80 | llvm::Type *Types[] = { RegisterType }; |
10968 | | |
10969 | 80 | if (BuiltinID == clang::AArch64::BI_ReadStatusReg) { |
10970 | 40 | llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); |
10971 | | |
10972 | 40 | return Builder.CreateCall(F, Metadata); |
10973 | 40 | } |
10974 | | |
10975 | 40 | llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); |
10976 | 40 | llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); |
10977 | | |
10978 | 40 | return Builder.CreateCall(F, { Metadata, ArgValue }); |
10979 | 80 | } |
10980 | | |
10981 | 4.02k | if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) { |
10982 | 1 | llvm::Function *F = |
10983 | 1 | CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); |
10984 | 1 | return Builder.CreateCall(F); |
10985 | 1 | } |
10986 | | |
10987 | 4.01k | if (BuiltinID == clang::AArch64::BI__builtin_sponentry) { |
10988 | 1 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); |
10989 | 1 | return Builder.CreateCall(F); |
10990 | 1 | } |
10991 | | |
10992 | 4.01k | if (BuiltinID == clang::AArch64::BI__mulh || |
10993 | 4.01k | BuiltinID == clang::AArch64::BI__umulh4.01k ) { |
10994 | 4 | llvm::Type *ResType = ConvertType(E->getType()); |
10995 | 4 | llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); |
10996 | | |
10997 | 4 | bool IsSigned = BuiltinID == clang::AArch64::BI__mulh; |
10998 | 4 | Value *LHS = |
10999 | 4 | Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned); |
11000 | 4 | Value *RHS = |
11001 | 4 | Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned); |
11002 | | |
11003 | 4 | Value *MulResult, *HigherBits; |
11004 | 4 | if (IsSigned) { |
11005 | 2 | MulResult = Builder.CreateNSWMul(LHS, RHS); |
11006 | 2 | HigherBits = Builder.CreateAShr(MulResult, 64); |
11007 | 2 | } else { |
11008 | 2 | MulResult = Builder.CreateNUWMul(LHS, RHS); |
11009 | 2 | HigherBits = Builder.CreateLShr(MulResult, 64); |
11010 | 2 | } |
11011 | 4 | HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); |
11012 | | |
11013 | 4 | return HigherBits; |
11014 | 4 | } |
11015 | | |
11016 | 4.01k | if (BuiltinID == AArch64::BI__writex18byte || |
11017 | 4.01k | BuiltinID == AArch64::BI__writex18word4.01k || |
11018 | 4.01k | BuiltinID == AArch64::BI__writex18dword4.01k || |
11019 | 4.01k | BuiltinID == AArch64::BI__writex18qword4.00k ) { |
11020 | | // Read x18 as i8* |
11021 | 8 | LLVMContext &Context = CGM.getLLVMContext(); |
11022 | 8 | llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; |
11023 | 8 | llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); |
11024 | 8 | llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); |
11025 | 8 | llvm::Function *F = |
11026 | 8 | CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); |
11027 | 8 | llvm::Value *X18 = Builder.CreateCall(F, Metadata); |
11028 | 8 | X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); |
11029 | | |
11030 | | // Store val at x18 + offset |
11031 | 8 | Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); |
11032 | 8 | Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); |
11033 | 8 | Value *Val = EmitScalarExpr(E->getArg(1)); |
11034 | 8 | StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One()); |
11035 | 8 | return Store; |
11036 | 8 | } |
11037 | | |
11038 | 4.00k | if (BuiltinID == AArch64::BI__readx18byte || |
11039 | 4.00k | BuiltinID == AArch64::BI__readx18word4.00k || |
11040 | 4.00k | BuiltinID == AArch64::BI__readx18dword4.00k || |
11041 | 4.00k | BuiltinID == AArch64::BI__readx18qword4.00k ) { |
11042 | 8 | llvm::Type *IntTy = ConvertType(E->getType()); |
11043 | | |
11044 | | // Read x18 as i8* |
11045 | 8 | LLVMContext &Context = CGM.getLLVMContext(); |
11046 | 8 | llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; |
11047 | 8 | llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); |
11048 | 8 | llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); |
11049 | 8 | llvm::Function *F = |
11050 | 8 | CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); |
11051 | 8 | llvm::Value *X18 = Builder.CreateCall(F, Metadata); |
11052 | 8 | X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); |
11053 | | |
11054 | | // Load x18 + offset |
11055 | 8 | Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); |
11056 | 8 | Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); |
11057 | 8 | LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One()); |
11058 | 8 | return Load; |
11059 | 8 | } |
11060 | | |
11061 | 3.99k | if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 || |
11062 | 3.99k | BuiltinID == AArch64::BI_CopyFloatFromInt323.99k || |
11063 | 3.99k | BuiltinID == AArch64::BI_CopyInt32FromFloat3.99k || |
11064 | 3.99k | BuiltinID == AArch64::BI_CopyInt64FromDouble3.99k ) { |
11065 | 8 | Value *Arg = EmitScalarExpr(E->getArg(0)); |
11066 | 8 | llvm::Type *RetTy = ConvertType(E->getType()); |
11067 | 8 | return Builder.CreateBitCast(Arg, RetTy); |
11068 | 8 | } |
11069 | | |
11070 | 3.99k | if (BuiltinID == AArch64::BI_CountLeadingOnes || |
11071 | 3.99k | BuiltinID == AArch64::BI_CountLeadingOnes643.98k || |
11072 | 3.99k | BuiltinID == AArch64::BI_CountLeadingZeros3.98k || |
11073 | 3.99k | BuiltinID == AArch64::BI_CountLeadingZeros643.98k ) { |
11074 | 8 | Value *Arg = EmitScalarExpr(E->getArg(0)); |
11075 | 8 | llvm::Type *ArgType = Arg->getType(); |
11076 | | |
11077 | 8 | if (BuiltinID == AArch64::BI_CountLeadingOnes || |
11078 | 8 | BuiltinID == AArch64::BI_CountLeadingOnes646 ) |
11079 | 4 | Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType)); |
11080 | | |
11081 | 8 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); |
11082 | 8 | Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); |
11083 | | |
11084 | 8 | if (BuiltinID == AArch64::BI_CountLeadingOnes64 || |
11085 | 8 | BuiltinID == AArch64::BI_CountLeadingZeros646 ) |
11086 | 4 | Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); |
11087 | 8 | return Result; |
11088 | 8 | } |
11089 | | |
11090 | 3.98k | if (BuiltinID == AArch64::BI_CountLeadingSigns || |
11091 | 3.98k | BuiltinID == AArch64::BI_CountLeadingSigns643.98k ) { |
11092 | 4 | Value *Arg = EmitScalarExpr(E->getArg(0)); |
11093 | | |
11094 | 4 | Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns) |
11095 | 4 | ? CGM.getIntrinsic(Intrinsic::aarch64_cls)2 |
11096 | 4 | : CGM.getIntrinsic(Intrinsic::aarch64_cls64)2 ; |
11097 | | |
11098 | 4 | Value *Result = Builder.CreateCall(F, Arg, "cls"); |
11099 | 4 | if (BuiltinID == AArch64::BI_CountLeadingSigns64) |
11100 | 2 | Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); |
11101 | 4 | return Result; |
11102 | 4 | } |
11103 | | |
11104 | 3.97k | if (BuiltinID == AArch64::BI_CountOneBits || |
11105 | 3.97k | BuiltinID == AArch64::BI_CountOneBits643.97k ) { |
11106 | 4 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
11107 | 4 | llvm::Type *ArgType = ArgValue->getType(); |
11108 | 4 | Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); |
11109 | | |
11110 | 4 | Value *Result = Builder.CreateCall(F, ArgValue); |
11111 | 4 | if (BuiltinID == AArch64::BI_CountOneBits64) |
11112 | 2 | Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); |
11113 | 4 | return Result; |
11114 | 4 | } |
11115 | | |
11116 | 3.97k | if (BuiltinID == AArch64::BI__prefetch) { |
11117 | 2 | Value *Address = EmitScalarExpr(E->getArg(0)); |
11118 | 2 | Value *RW = llvm::ConstantInt::get(Int32Ty, 0); |
11119 | 2 | Value *Locality = ConstantInt::get(Int32Ty, 3); |
11120 | 2 | Value *Data = llvm::ConstantInt::get(Int32Ty, 1); |
11121 | 2 | Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); |
11122 | 2 | return Builder.CreateCall(F, {Address, RW, Locality, Data}); |
11123 | 2 | } |
11124 | | |
11125 | | // Handle MSVC intrinsics before argument evaluation to prevent double |
11126 | | // evaluation. |
11127 | 3.97k | if (std::optional<MSVCIntrin> MsvcIntId = |
11128 | 3.97k | translateAarch64ToMsvcIntrin(BuiltinID)) |
11129 | 210 | return EmitMSVCBuiltinExpr(*MsvcIntId, E); |
11130 | | |
11131 | | // Some intrinsics are equivalent - if they are use the base intrinsic ID. |
11132 | 547k | auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) 3.76k { |
11133 | 547k | return P.first == BuiltinID; |
11134 | 547k | }); |
11135 | 3.76k | if (It != end(NEONEquivalentIntrinsicMap)) |
11136 | 206 | BuiltinID = It->second; |
11137 | | |
11138 | | // Find out if any arguments are required to be integer constant |
11139 | | // expressions. |
11140 | 3.76k | unsigned ICEArguments = 0; |
11141 | 3.76k | ASTContext::GetBuiltinTypeError Error; |
11142 | 3.76k | getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); |
11143 | 3.76k | assert(Error == ASTContext::GE_None && "Should not codegen an error"); |
11144 | | |
11145 | 3.76k | llvm::SmallVector<Value*, 4> Ops; |
11146 | 3.76k | Address PtrOp0 = Address::invalid(); |
11147 | 11.3k | for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++7.63k ) { |
11148 | 7.63k | if (i == 0) { |
11149 | 3.47k | switch (BuiltinID) { |
11150 | 32 | case NEON::BI__builtin_neon_vld1_v: |
11151 | 47 | case NEON::BI__builtin_neon_vld1q_v: |
11152 | 62 | case NEON::BI__builtin_neon_vld1_dup_v: |
11153 | 77 | case NEON::BI__builtin_neon_vld1q_dup_v: |
11154 | 92 | case NEON::BI__builtin_neon_vld1_lane_v: |
11155 | 107 | case NEON::BI__builtin_neon_vld1q_lane_v: |
11156 | 122 | case NEON::BI__builtin_neon_vst1_v: |
11157 | 137 | case NEON::BI__builtin_neon_vst1q_v: |
11158 | 152 | case NEON::BI__builtin_neon_vst1_lane_v: |
11159 | 167 | case NEON::BI__builtin_neon_vst1q_lane_v: |
11160 | 171 | case NEON::BI__builtin_neon_vldap1_lane_s64: |
11161 | 175 | case NEON::BI__builtin_neon_vldap1q_lane_s64: |
11162 | 179 | case NEON::BI__builtin_neon_vstl1_lane_s64: |
11163 | 183 | case NEON::BI__builtin_neon_vstl1q_lane_s64: |
11164 | | // Get the alignment for the argument in addition to the value; |
11165 | | // we'll use it later. |
11166 | 183 | PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); |
11167 | 183 | Ops.push_back(PtrOp0.getPointer()); |
11168 | 183 | continue; |
11169 | 3.47k | } |
11170 | 3.47k | } |
11171 | 7.45k | Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); |
11172 | 7.45k | } |
11173 | | |
11174 | 3.76k | auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap); |
11175 | 3.76k | const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( |
11176 | 3.76k | SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); |
11177 | | |
11178 | 3.76k | if (Builtin) { |
11179 | 271 | Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); |
11180 | 271 | Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); |
11181 | 271 | assert(Result && "SISD intrinsic should have been handled"); |
11182 | 271 | return Result; |
11183 | 271 | } |
11184 | | |
11185 | 3.49k | const Expr *Arg = E->getArg(E->getNumArgs()-1); |
11186 | 3.49k | NeonTypeFlags Type(0); |
11187 | 3.49k | if (std::optional<llvm::APSInt> Result = |
11188 | 3.49k | Arg->getIntegerConstantExpr(getContext())) |
11189 | | // Determine the type of this overloaded NEON intrinsic. |
11190 | 3.21k | Type = NeonTypeFlags(Result->getZExtValue()); |
11191 | | |
11192 | 3.49k | bool usgn = Type.isUnsigned(); |
11193 | 3.49k | bool quad = Type.isQuad(); |
11194 | | |
11195 | | // Handle non-overloaded intrinsics first. |
11196 | 3.49k | switch (BuiltinID) { |
11197 | 2.85k | default: break; |
11198 | 2.85k | case NEON::BI__builtin_neon_vabsh_f16: |
11199 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11200 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); |
11201 | 1 | case NEON::BI__builtin_neon_vaddq_p128: { |
11202 | 1 | llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128); |
11203 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11204 | 1 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
11205 | 1 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
11206 | 1 | Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); |
11207 | 1 | llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); |
11208 | 1 | return Builder.CreateBitCast(Ops[0], Int128Ty); |
11209 | 0 | } |
11210 | 2 | case NEON::BI__builtin_neon_vldrq_p128: { |
11211 | 2 | llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); |
11212 | 2 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
11213 | 2 | return Builder.CreateAlignedLoad(Int128Ty, Ptr, |
11214 | 2 | CharUnits::fromQuantity(16)); |
11215 | 0 | } |
11216 | 2 | case NEON::BI__builtin_neon_vstrq_p128: { |
11217 | 2 | Value *Ptr = Ops[0]; |
11218 | 2 | return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); |
11219 | 0 | } |
11220 | 3 | case NEON::BI__builtin_neon_vcvts_f32_u32: |
11221 | 6 | case NEON::BI__builtin_neon_vcvtd_f64_u64: |
11222 | 6 | usgn = true; |
11223 | 6 | [[fallthrough]]; |
11224 | 9 | case NEON::BI__builtin_neon_vcvts_f32_s32: |
11225 | 12 | case NEON::BI__builtin_neon_vcvtd_f64_s64: { |
11226 | 12 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11227 | 12 | bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; |
11228 | 12 | llvm::Type *InTy = Is64 ? Int64Ty6 : Int32Ty6 ; |
11229 | 12 | llvm::Type *FTy = Is64 ? DoubleTy6 : FloatTy6 ; |
11230 | 12 | Ops[0] = Builder.CreateBitCast(Ops[0], InTy); |
11231 | 12 | if (usgn) |
11232 | 6 | return Builder.CreateUIToFP(Ops[0], FTy); |
11233 | 6 | return Builder.CreateSIToFP(Ops[0], FTy); |
11234 | 12 | } |
11235 | 3 | case NEON::BI__builtin_neon_vcvth_f16_u16: |
11236 | 6 | case NEON::BI__builtin_neon_vcvth_f16_u32: |
11237 | 9 | case NEON::BI__builtin_neon_vcvth_f16_u64: |
11238 | 9 | usgn = true; |
11239 | 9 | [[fallthrough]]; |
11240 | 12 | case NEON::BI__builtin_neon_vcvth_f16_s16: |
11241 | 15 | case NEON::BI__builtin_neon_vcvth_f16_s32: |
11242 | 18 | case NEON::BI__builtin_neon_vcvth_f16_s64: { |
11243 | 18 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11244 | 18 | llvm::Type *FTy = HalfTy; |
11245 | 18 | llvm::Type *InTy; |
11246 | 18 | if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64) |
11247 | 6 | InTy = Int64Ty; |
11248 | 12 | else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32) |
11249 | 6 | InTy = Int32Ty; |
11250 | 6 | else |
11251 | 6 | InTy = Int16Ty; |
11252 | 18 | Ops[0] = Builder.CreateBitCast(Ops[0], InTy); |
11253 | 18 | if (usgn) |
11254 | 9 | return Builder.CreateUIToFP(Ops[0], FTy); |
11255 | 9 | return Builder.CreateSIToFP(Ops[0], FTy); |
11256 | 18 | } |
11257 | 1 | case NEON::BI__builtin_neon_vcvtah_u16_f16: |
11258 | 2 | case NEON::BI__builtin_neon_vcvtmh_u16_f16: |
11259 | 3 | case NEON::BI__builtin_neon_vcvtnh_u16_f16: |
11260 | 4 | case NEON::BI__builtin_neon_vcvtph_u16_f16: |
11261 | 7 | case NEON::BI__builtin_neon_vcvth_u16_f16: |
11262 | 8 | case NEON::BI__builtin_neon_vcvtah_s16_f16: |
11263 | 9 | case NEON::BI__builtin_neon_vcvtmh_s16_f16: |
11264 | 10 | case NEON::BI__builtin_neon_vcvtnh_s16_f16: |
11265 | 11 | case NEON::BI__builtin_neon_vcvtph_s16_f16: |
11266 | 14 | case NEON::BI__builtin_neon_vcvth_s16_f16: { |
11267 | 14 | unsigned Int; |
11268 | 14 | llvm::Type* InTy = Int32Ty; |
11269 | 14 | llvm::Type* FTy = HalfTy; |
11270 | 14 | llvm::Type *Tys[2] = {InTy, FTy}; |
11271 | 14 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11272 | 14 | switch (BuiltinID) { |
11273 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11274 | 1 | case NEON::BI__builtin_neon_vcvtah_u16_f16: |
11275 | 1 | Int = Intrinsic::aarch64_neon_fcvtau; break; |
11276 | 1 | case NEON::BI__builtin_neon_vcvtmh_u16_f16: |
11277 | 1 | Int = Intrinsic::aarch64_neon_fcvtmu; break; |
11278 | 1 | case NEON::BI__builtin_neon_vcvtnh_u16_f16: |
11279 | 1 | Int = Intrinsic::aarch64_neon_fcvtnu; break; |
11280 | 1 | case NEON::BI__builtin_neon_vcvtph_u16_f16: |
11281 | 1 | Int = Intrinsic::aarch64_neon_fcvtpu; break; |
11282 | 3 | case NEON::BI__builtin_neon_vcvth_u16_f16: |
11283 | 3 | Int = Intrinsic::aarch64_neon_fcvtzu; break; |
11284 | 1 | case NEON::BI__builtin_neon_vcvtah_s16_f16: |
11285 | 1 | Int = Intrinsic::aarch64_neon_fcvtas; break; |
11286 | 1 | case NEON::BI__builtin_neon_vcvtmh_s16_f16: |
11287 | 1 | Int = Intrinsic::aarch64_neon_fcvtms; break; |
11288 | 1 | case NEON::BI__builtin_neon_vcvtnh_s16_f16: |
11289 | 1 | Int = Intrinsic::aarch64_neon_fcvtns; break; |
11290 | 1 | case NEON::BI__builtin_neon_vcvtph_s16_f16: |
11291 | 1 | Int = Intrinsic::aarch64_neon_fcvtps; break; |
11292 | 3 | case NEON::BI__builtin_neon_vcvth_s16_f16: |
11293 | 3 | Int = Intrinsic::aarch64_neon_fcvtzs; break; |
11294 | 14 | } |
11295 | 14 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); |
11296 | 14 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
11297 | 14 | } |
11298 | 1 | case NEON::BI__builtin_neon_vcaleh_f16: |
11299 | 2 | case NEON::BI__builtin_neon_vcalth_f16: |
11300 | 3 | case NEON::BI__builtin_neon_vcageh_f16: |
11301 | 4 | case NEON::BI__builtin_neon_vcagth_f16: { |
11302 | 4 | unsigned Int; |
11303 | 4 | llvm::Type* InTy = Int32Ty; |
11304 | 4 | llvm::Type* FTy = HalfTy; |
11305 | 4 | llvm::Type *Tys[2] = {InTy, FTy}; |
11306 | 4 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11307 | 4 | switch (BuiltinID) { |
11308 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11309 | 1 | case NEON::BI__builtin_neon_vcageh_f16: |
11310 | 1 | Int = Intrinsic::aarch64_neon_facge; break; |
11311 | 1 | case NEON::BI__builtin_neon_vcagth_f16: |
11312 | 1 | Int = Intrinsic::aarch64_neon_facgt; break; |
11313 | 1 | case NEON::BI__builtin_neon_vcaleh_f16: |
11314 | 1 | Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break; |
11315 | 1 | case NEON::BI__builtin_neon_vcalth_f16: |
11316 | 1 | Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break; |
11317 | 4 | } |
11318 | 4 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg"); |
11319 | 4 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
11320 | 4 | } |
11321 | 1 | case NEON::BI__builtin_neon_vcvth_n_s16_f16: |
11322 | 2 | case NEON::BI__builtin_neon_vcvth_n_u16_f16: { |
11323 | 2 | unsigned Int; |
11324 | 2 | llvm::Type* InTy = Int32Ty; |
11325 | 2 | llvm::Type* FTy = HalfTy; |
11326 | 2 | llvm::Type *Tys[2] = {InTy, FTy}; |
11327 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11328 | 2 | switch (BuiltinID) { |
11329 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11330 | 1 | case NEON::BI__builtin_neon_vcvth_n_s16_f16: |
11331 | 1 | Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break; |
11332 | 1 | case NEON::BI__builtin_neon_vcvth_n_u16_f16: |
11333 | 1 | Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break; |
11334 | 2 | } |
11335 | 2 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); |
11336 | 2 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
11337 | 2 | } |
11338 | 1 | case NEON::BI__builtin_neon_vcvth_n_f16_s16: |
11339 | 2 | case NEON::BI__builtin_neon_vcvth_n_f16_u16: { |
11340 | 2 | unsigned Int; |
11341 | 2 | llvm::Type* FTy = HalfTy; |
11342 | 2 | llvm::Type* InTy = Int32Ty; |
11343 | 2 | llvm::Type *Tys[2] = {FTy, InTy}; |
11344 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11345 | 2 | switch (BuiltinID) { |
11346 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11347 | 1 | case NEON::BI__builtin_neon_vcvth_n_f16_s16: |
11348 | 1 | Int = Intrinsic::aarch64_neon_vcvtfxs2fp; |
11349 | 1 | Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext"); |
11350 | 1 | break; |
11351 | 1 | case NEON::BI__builtin_neon_vcvth_n_f16_u16: |
11352 | 1 | Int = Intrinsic::aarch64_neon_vcvtfxu2fp; |
11353 | 1 | Ops[0] = Builder.CreateZExt(Ops[0], InTy); |
11354 | 1 | break; |
11355 | 2 | } |
11356 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); |
11357 | 2 | } |
11358 | 0 | case NEON::BI__builtin_neon_vpaddd_s64: { |
11359 | 0 | auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2); |
11360 | 0 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
11361 | | // The vector is v2f64, so make sure it's bitcast to that. |
11362 | 0 | Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); |
11363 | 0 | llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); |
11364 | 0 | llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); |
11365 | 0 | Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); |
11366 | 0 | Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); |
11367 | | // Pairwise addition of a v2f64 into a scalar f64. |
11368 | 0 | return Builder.CreateAdd(Op0, Op1, "vpaddd"); |
11369 | 2 | } |
11370 | 3 | case NEON::BI__builtin_neon_vpaddd_f64: { |
11371 | 3 | auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2); |
11372 | 3 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
11373 | | // The vector is v2f64, so make sure it's bitcast to that. |
11374 | 3 | Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); |
11375 | 3 | llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); |
11376 | 3 | llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); |
11377 | 3 | Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); |
11378 | 3 | Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); |
11379 | | // Pairwise addition of a v2f64 into a scalar f64. |
11380 | 3 | return Builder.CreateFAdd(Op0, Op1, "vpaddd"); |
11381 | 2 | } |
11382 | 3 | case NEON::BI__builtin_neon_vpadds_f32: { |
11383 | 3 | auto *Ty = llvm::FixedVectorType::get(FloatTy, 2); |
11384 | 3 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
11385 | | // The vector is v2f32, so make sure it's bitcast to that. |
11386 | 3 | Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); |
11387 | 3 | llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); |
11388 | 3 | llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); |
11389 | 3 | Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); |
11390 | 3 | Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); |
11391 | | // Pairwise addition of a v2f32 into a scalar f32. |
11392 | 3 | return Builder.CreateFAdd(Op0, Op1, "vpaddd"); |
11393 | 2 | } |
11394 | 1 | case NEON::BI__builtin_neon_vceqzd_s64: |
11395 | 4 | case NEON::BI__builtin_neon_vceqzd_f64: |
11396 | 7 | case NEON::BI__builtin_neon_vceqzs_f32: |
11397 | 10 | case NEON::BI__builtin_neon_vceqzh_f16: |
11398 | 10 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11399 | 10 | return EmitAArch64CompareBuiltinExpr( |
11400 | 10 | Ops[0], ConvertType(E->getCallReturnType(getContext())), |
11401 | 10 | ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); |
11402 | 1 | case NEON::BI__builtin_neon_vcgezd_s64: |
11403 | 4 | case NEON::BI__builtin_neon_vcgezd_f64: |
11404 | 7 | case NEON::BI__builtin_neon_vcgezs_f32: |
11405 | 10 | case NEON::BI__builtin_neon_vcgezh_f16: |
11406 | 10 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11407 | 10 | return EmitAArch64CompareBuiltinExpr( |
11408 | 10 | Ops[0], ConvertType(E->getCallReturnType(getContext())), |
11409 | 10 | ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); |
11410 | 1 | case NEON::BI__builtin_neon_vclezd_s64: |
11411 | 4 | case NEON::BI__builtin_neon_vclezd_f64: |
11412 | 7 | case NEON::BI__builtin_neon_vclezs_f32: |
11413 | 10 | case NEON::BI__builtin_neon_vclezh_f16: |
11414 | 10 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11415 | 10 | return EmitAArch64CompareBuiltinExpr( |
11416 | 10 | Ops[0], ConvertType(E->getCallReturnType(getContext())), |
11417 | 10 | ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); |
11418 | 1 | case NEON::BI__builtin_neon_vcgtzd_s64: |
11419 | 4 | case NEON::BI__builtin_neon_vcgtzd_f64: |
11420 | 7 | case NEON::BI__builtin_neon_vcgtzs_f32: |
11421 | 10 | case NEON::BI__builtin_neon_vcgtzh_f16: |
11422 | 10 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11423 | 10 | return EmitAArch64CompareBuiltinExpr( |
11424 | 10 | Ops[0], ConvertType(E->getCallReturnType(getContext())), |
11425 | 10 | ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); |
11426 | 1 | case NEON::BI__builtin_neon_vcltzd_s64: |
11427 | 4 | case NEON::BI__builtin_neon_vcltzd_f64: |
11428 | 7 | case NEON::BI__builtin_neon_vcltzs_f32: |
11429 | 10 | case NEON::BI__builtin_neon_vcltzh_f16: |
11430 | 10 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11431 | 10 | return EmitAArch64CompareBuiltinExpr( |
11432 | 10 | Ops[0], ConvertType(E->getCallReturnType(getContext())), |
11433 | 10 | ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); |
11434 | | |
11435 | 1 | case NEON::BI__builtin_neon_vceqzd_u64: { |
11436 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
11437 | 1 | Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); |
11438 | 1 | Ops[0] = |
11439 | 1 | Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); |
11440 | 1 | return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); |
11441 | 7 | } |
11442 | 3 | case NEON::BI__builtin_neon_vceqd_f64: |
11443 | 6 | case NEON::BI__builtin_neon_vcled_f64: |
11444 | 9 | case NEON::BI__builtin_neon_vcltd_f64: |
11445 | 12 | case NEON::BI__builtin_neon_vcged_f64: |
11446 | 15 | case NEON::BI__builtin_neon_vcgtd_f64: { |
11447 | 15 | llvm::CmpInst::Predicate P; |
11448 | 15 | switch (BuiltinID) { |
11449 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11450 | 3 | case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; |
11451 | 3 | case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; |
11452 | 3 | case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; |
11453 | 3 | case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; |
11454 | 3 | case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; |
11455 | 15 | } |
11456 | 15 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11457 | 15 | Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); |
11458 | 15 | Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); |
11459 | 15 | if (P == llvm::FCmpInst::FCMP_OEQ) |
11460 | 3 | Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); |
11461 | 12 | else |
11462 | 12 | Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); |
11463 | 15 | return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); |
11464 | 15 | } |
11465 | 3 | case NEON::BI__builtin_neon_vceqs_f32: |
11466 | 6 | case NEON::BI__builtin_neon_vcles_f32: |
11467 | 9 | case NEON::BI__builtin_neon_vclts_f32: |
11468 | 12 | case NEON::BI__builtin_neon_vcges_f32: |
11469 | 15 | case NEON::BI__builtin_neon_vcgts_f32: { |
11470 | 15 | llvm::CmpInst::Predicate P; |
11471 | 15 | switch (BuiltinID) { |
11472 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11473 | 3 | case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; |
11474 | 3 | case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; |
11475 | 3 | case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; |
11476 | 3 | case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; |
11477 | 3 | case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; |
11478 | 15 | } |
11479 | 15 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11480 | 15 | Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); |
11481 | 15 | Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); |
11482 | 15 | if (P == llvm::FCmpInst::FCMP_OEQ) |
11483 | 3 | Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); |
11484 | 12 | else |
11485 | 12 | Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); |
11486 | 15 | return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); |
11487 | 15 | } |
11488 | 3 | case NEON::BI__builtin_neon_vceqh_f16: |
11489 | 6 | case NEON::BI__builtin_neon_vcleh_f16: |
11490 | 9 | case NEON::BI__builtin_neon_vclth_f16: |
11491 | 12 | case NEON::BI__builtin_neon_vcgeh_f16: |
11492 | 15 | case NEON::BI__builtin_neon_vcgth_f16: { |
11493 | 15 | llvm::CmpInst::Predicate P; |
11494 | 15 | switch (BuiltinID) { |
11495 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11496 | 3 | case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break; |
11497 | 3 | case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break; |
11498 | 3 | case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break; |
11499 | 3 | case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break; |
11500 | 3 | case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break; |
11501 | 15 | } |
11502 | 15 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11503 | 15 | Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); |
11504 | 15 | Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); |
11505 | 15 | if (P == llvm::FCmpInst::FCMP_OEQ) |
11506 | 3 | Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); |
11507 | 12 | else |
11508 | 12 | Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); |
11509 | 15 | return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); |
11510 | 15 | } |
11511 | 1 | case NEON::BI__builtin_neon_vceqd_s64: |
11512 | 2 | case NEON::BI__builtin_neon_vceqd_u64: |
11513 | 3 | case NEON::BI__builtin_neon_vcgtd_s64: |
11514 | 4 | case NEON::BI__builtin_neon_vcgtd_u64: |
11515 | 5 | case NEON::BI__builtin_neon_vcltd_s64: |
11516 | 6 | case NEON::BI__builtin_neon_vcltd_u64: |
11517 | 7 | case NEON::BI__builtin_neon_vcged_u64: |
11518 | 8 | case NEON::BI__builtin_neon_vcged_s64: |
11519 | 9 | case NEON::BI__builtin_neon_vcled_u64: |
11520 | 10 | case NEON::BI__builtin_neon_vcled_s64: { |
11521 | 10 | llvm::CmpInst::Predicate P; |
11522 | 10 | switch (BuiltinID) { |
11523 | 0 | default: llvm_unreachable("missing builtin ID in switch!"); |
11524 | 1 | case NEON::BI__builtin_neon_vceqd_s64: |
11525 | 2 | case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; |
11526 | 1 | case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; |
11527 | 1 | case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; |
11528 | 1 | case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; |
11529 | 1 | case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; |
11530 | 1 | case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; |
11531 | 1 | case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; |
11532 | 1 | case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; |
11533 | 1 | case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; |
11534 | 10 | } |
11535 | 10 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11536 | 10 | Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); |
11537 | 10 | Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); |
11538 | 10 | Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); |
11539 | 10 | return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); |
11540 | 10 | } |
11541 | 1 | case NEON::BI__builtin_neon_vtstd_s64: |
11542 | 2 | case NEON::BI__builtin_neon_vtstd_u64: { |
11543 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11544 | 2 | Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); |
11545 | 2 | Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); |
11546 | 2 | Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); |
11547 | 2 | Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], |
11548 | 2 | llvm::Constant::getNullValue(Int64Ty)); |
11549 | 2 | return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); |
11550 | 1 | } |
11551 | 3 | case NEON::BI__builtin_neon_vset_lane_i8: |
11552 | 7 | case NEON::BI__builtin_neon_vset_lane_i16: |
11553 | 9 | case NEON::BI__builtin_neon_vset_lane_i32: |
11554 | 13 | case NEON::BI__builtin_neon_vset_lane_i64: |
11555 | 22 | case NEON::BI__builtin_neon_vset_lane_bf16: |
11556 | 23 | case NEON::BI__builtin_neon_vset_lane_f32: |
11557 | 28 | case NEON::BI__builtin_neon_vsetq_lane_i8: |
11558 | 34 | case NEON::BI__builtin_neon_vsetq_lane_i16: |
11559 | 38 | case NEON::BI__builtin_neon_vsetq_lane_i32: |
11560 | 45 | case NEON::BI__builtin_neon_vsetq_lane_i64: |
11561 | 54 | case NEON::BI__builtin_neon_vsetq_lane_bf16: |
11562 | 56 | case NEON::BI__builtin_neon_vsetq_lane_f32: |
11563 | 56 | Ops.push_back(EmitScalarExpr(E->getArg(2))); |
11564 | 56 | return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); |
11565 | 5 | case NEON::BI__builtin_neon_vset_lane_f64: |
11566 | | // The vector type needs a cast for the v1f64 variant. |
11567 | 5 | Ops[1] = |
11568 | 5 | Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1)); |
11569 | 5 | Ops.push_back(EmitScalarExpr(E->getArg(2))); |
11570 | 5 | return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); |
11571 | 1 | case NEON::BI__builtin_neon_vsetq_lane_f64: |
11572 | | // The vector type needs a cast for the v2f64 variant. |
11573 | 1 | Ops[1] = |
11574 | 1 | Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2)); |
11575 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(2))); |
11576 | 1 | return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); |
11577 | | |
11578 | 3 | case NEON::BI__builtin_neon_vget_lane_i8: |
11579 | 10 | case NEON::BI__builtin_neon_vdupb_lane_i8: |
11580 | 10 | Ops[0] = |
11581 | 10 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8)); |
11582 | 10 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11583 | 10 | "vget_lane"); |
11584 | 5 | case NEON::BI__builtin_neon_vgetq_lane_i8: |
11585 | 8 | case NEON::BI__builtin_neon_vdupb_laneq_i8: |
11586 | 8 | Ops[0] = |
11587 | 8 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16)); |
11588 | 8 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11589 | 8 | "vgetq_lane"); |
11590 | 58 | case NEON::BI__builtin_neon_vget_lane_i16: |
11591 | 65 | case NEON::BI__builtin_neon_vduph_lane_i16: |
11592 | 65 | Ops[0] = |
11593 | 65 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4)); |
11594 | 65 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11595 | 65 | "vget_lane"); |
11596 | 60 | case NEON::BI__builtin_neon_vgetq_lane_i16: |
11597 | 63 | case NEON::BI__builtin_neon_vduph_laneq_i16: |
11598 | 63 | Ops[0] = |
11599 | 63 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8)); |
11600 | 63 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11601 | 63 | "vgetq_lane"); |
11602 | 31 | case NEON::BI__builtin_neon_vget_lane_i32: |
11603 | 37 | case NEON::BI__builtin_neon_vdups_lane_i32: |
11604 | 37 | Ops[0] = |
11605 | 37 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2)); |
11606 | 37 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11607 | 37 | "vget_lane"); |
11608 | 3 | case NEON::BI__builtin_neon_vdups_lane_f32: |
11609 | 3 | Ops[0] = |
11610 | 3 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); |
11611 | 3 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11612 | 3 | "vdups_lane"); |
11613 | 33 | case NEON::BI__builtin_neon_vgetq_lane_i32: |
11614 | 35 | case NEON::BI__builtin_neon_vdups_laneq_i32: |
11615 | 35 | Ops[0] = |
11616 | 35 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); |
11617 | 35 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11618 | 35 | "vgetq_lane"); |
11619 | 17 | case NEON::BI__builtin_neon_vget_lane_i64: |
11620 | 23 | case NEON::BI__builtin_neon_vdupd_lane_i64: |
11621 | 23 | Ops[0] = |
11622 | 23 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1)); |
11623 | 23 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11624 | 23 | "vget_lane"); |
11625 | 3 | case NEON::BI__builtin_neon_vdupd_lane_f64: |
11626 | 3 | Ops[0] = |
11627 | 3 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); |
11628 | 3 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11629 | 3 | "vdupd_lane"); |
11630 | 22 | case NEON::BI__builtin_neon_vgetq_lane_i64: |
11631 | 24 | case NEON::BI__builtin_neon_vdupd_laneq_i64: |
11632 | 24 | Ops[0] = |
11633 | 24 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); |
11634 | 24 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11635 | 24 | "vgetq_lane"); |
11636 | 3 | case NEON::BI__builtin_neon_vget_lane_f32: |
11637 | 3 | Ops[0] = |
11638 | 3 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); |
11639 | 3 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11640 | 3 | "vget_lane"); |
11641 | 9 | case NEON::BI__builtin_neon_vget_lane_f64: |
11642 | 9 | Ops[0] = |
11643 | 9 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); |
11644 | 9 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11645 | 9 | "vget_lane"); |
11646 | 4 | case NEON::BI__builtin_neon_vgetq_lane_f32: |
11647 | 5 | case NEON::BI__builtin_neon_vdups_laneq_f32: |
11648 | 5 | Ops[0] = |
11649 | 5 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4)); |
11650 | 5 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11651 | 5 | "vgetq_lane"); |
11652 | 6 | case NEON::BI__builtin_neon_vgetq_lane_f64: |
11653 | 7 | case NEON::BI__builtin_neon_vdupd_laneq_f64: |
11654 | 7 | Ops[0] = |
11655 | 7 | Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2)); |
11656 | 7 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11657 | 7 | "vgetq_lane"); |
11658 | 3 | case NEON::BI__builtin_neon_vaddh_f16: |
11659 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11660 | 3 | return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); |
11661 | 3 | case NEON::BI__builtin_neon_vsubh_f16: |
11662 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11663 | 3 | return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); |
11664 | 3 | case NEON::BI__builtin_neon_vmulh_f16: |
11665 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11666 | 3 | return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); |
11667 | 3 | case NEON::BI__builtin_neon_vdivh_f16: |
11668 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11669 | 3 | return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); |
11670 | 3 | case NEON::BI__builtin_neon_vfmah_f16: |
11671 | | // NEON intrinsic puts accumulator first, unlike the LLVM fma. |
11672 | 3 | return emitCallMaybeConstrainedFPBuiltin( |
11673 | 3 | *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, |
11674 | 3 | {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); |
11675 | 3 | case NEON::BI__builtin_neon_vfmsh_f16: { |
11676 | 3 | Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh"); |
11677 | | |
11678 | | // NEON intrinsic puts accumulator first, unlike the LLVM fma. |
11679 | 3 | return emitCallMaybeConstrainedFPBuiltin( |
11680 | 3 | *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, |
11681 | 3 | {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]}); |
11682 | 6 | } |
11683 | 1 | case NEON::BI__builtin_neon_vaddd_s64: |
11684 | 2 | case NEON::BI__builtin_neon_vaddd_u64: |
11685 | 2 | return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); |
11686 | 1 | case NEON::BI__builtin_neon_vsubd_s64: |
11687 | 2 | case NEON::BI__builtin_neon_vsubd_u64: |
11688 | 2 | return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); |
11689 | 1 | case NEON::BI__builtin_neon_vqdmlalh_s16: |
11690 | 2 | case NEON::BI__builtin_neon_vqdmlslh_s16: { |
11691 | 2 | SmallVector<Value *, 2> ProductOps; |
11692 | 2 | ProductOps.push_back(vectorWrapScalar16(Ops[1])); |
11693 | 2 | ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); |
11694 | 2 | auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); |
11695 | 2 | Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), |
11696 | 2 | ProductOps, "vqdmlXl"); |
11697 | 2 | Constant *CI = ConstantInt::get(SizeTy, 0); |
11698 | 2 | Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); |
11699 | | |
11700 | 2 | unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 |
11701 | 2 | ? Intrinsic::aarch64_neon_sqadd1 |
11702 | 2 | : Intrinsic::aarch64_neon_sqsub1 ; |
11703 | 2 | return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); |
11704 | 1 | } |
11705 | 1 | case NEON::BI__builtin_neon_vqshlud_n_s64: { |
11706 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11707 | 1 | Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); |
11708 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), |
11709 | 1 | Ops, "vqshlu_n"); |
11710 | 1 | } |
11711 | 1 | case NEON::BI__builtin_neon_vqshld_n_u64: |
11712 | 2 | case NEON::BI__builtin_neon_vqshld_n_s64: { |
11713 | 2 | unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 |
11714 | 2 | ? Intrinsic::aarch64_neon_uqshl1 |
11715 | 2 | : Intrinsic::aarch64_neon_sqshl1 ; |
11716 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11717 | 2 | Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); |
11718 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); |
11719 | 1 | } |
11720 | 1 | case NEON::BI__builtin_neon_vrshrd_n_u64: |
11721 | 2 | case NEON::BI__builtin_neon_vrshrd_n_s64: { |
11722 | 2 | unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 |
11723 | 2 | ? Intrinsic::aarch64_neon_urshl1 |
11724 | 2 | : Intrinsic::aarch64_neon_srshl1 ; |
11725 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11726 | 2 | int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); |
11727 | 2 | Ops[1] = ConstantInt::get(Int64Ty, -SV); |
11728 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); |
11729 | 1 | } |
11730 | 1 | case NEON::BI__builtin_neon_vrsrad_n_u64: |
11731 | 2 | case NEON::BI__builtin_neon_vrsrad_n_s64: { |
11732 | 2 | unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 |
11733 | 2 | ? Intrinsic::aarch64_neon_urshl1 |
11734 | 2 | : Intrinsic::aarch64_neon_srshl1 ; |
11735 | 2 | Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); |
11736 | 2 | Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); |
11737 | 2 | Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), |
11738 | 2 | {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); |
11739 | 2 | return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); |
11740 | 1 | } |
11741 | 1 | case NEON::BI__builtin_neon_vshld_n_s64: |
11742 | 2 | case NEON::BI__builtin_neon_vshld_n_u64: { |
11743 | 2 | llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); |
11744 | 2 | return Builder.CreateShl( |
11745 | 2 | Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); |
11746 | 1 | } |
11747 | 1 | case NEON::BI__builtin_neon_vshrd_n_s64: { |
11748 | 1 | llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); |
11749 | 1 | return Builder.CreateAShr( |
11750 | 1 | Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), |
11751 | 1 | Amt->getZExtValue())), |
11752 | 1 | "shrd_n"); |
11753 | 1 | } |
11754 | 2 | case NEON::BI__builtin_neon_vshrd_n_u64: { |
11755 | 2 | llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); |
11756 | 2 | uint64_t ShiftAmt = Amt->getZExtValue(); |
11757 | | // Right-shifting an unsigned value by its size yields 0. |
11758 | 2 | if (ShiftAmt == 64) |
11759 | 2 | return ConstantInt::get(Int64Ty, 0); |
11760 | 0 | return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), |
11761 | 0 | "shrd_n"); |
11762 | 2 | } |
11763 | 1 | case NEON::BI__builtin_neon_vsrad_n_s64: { |
11764 | 1 | llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); |
11765 | 1 | Ops[1] = Builder.CreateAShr( |
11766 | 1 | Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), |
11767 | 1 | Amt->getZExtValue())), |
11768 | 1 | "shrd_n"); |
11769 | 1 | return Builder.CreateAdd(Ops[0], Ops[1]); |
11770 | 2 | } |
11771 | 2 | case NEON::BI__builtin_neon_vsrad_n_u64: { |
11772 | 2 | llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); |
11773 | 2 | uint64_t ShiftAmt = Amt->getZExtValue(); |
11774 | | // Right-shifting an unsigned value by its size yields 0. |
11775 | | // As Op + 0 = Op, return Ops[0] directly. |
11776 | 2 | if (ShiftAmt == 64) |
11777 | 1 | return Ops[0]; |
11778 | 1 | Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), |
11779 | 1 | "shrd_n"); |
11780 | 1 | return Builder.CreateAdd(Ops[0], Ops[1]); |
11781 | 2 | } |
11782 | 1 | case NEON::BI__builtin_neon_vqdmlalh_lane_s16: |
11783 | 2 | case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: |
11784 | 3 | case NEON::BI__builtin_neon_vqdmlslh_lane_s16: |
11785 | 4 | case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { |
11786 | 4 | Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), |
11787 | 4 | "lane"); |
11788 | 4 | SmallVector<Value *, 2> ProductOps; |
11789 | 4 | ProductOps.push_back(vectorWrapScalar16(Ops[1])); |
11790 | 4 | ProductOps.push_back(vectorWrapScalar16(Ops[2])); |
11791 | 4 | auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); |
11792 | 4 | Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), |
11793 | 4 | ProductOps, "vqdmlXl"); |
11794 | 4 | Constant *CI = ConstantInt::get(SizeTy, 0); |
11795 | 4 | Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); |
11796 | 4 | Ops.pop_back(); |
11797 | | |
11798 | 4 | unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || |
11799 | 4 | BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s163 ) |
11800 | 4 | ? Intrinsic::aarch64_neon_sqadd2 |
11801 | 4 | : Intrinsic::aarch64_neon_sqsub2 ; |
11802 | 4 | return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); |
11803 | 3 | } |
11804 | 1 | case NEON::BI__builtin_neon_vqdmlals_s32: |
11805 | 2 | case NEON::BI__builtin_neon_vqdmlsls_s32: { |
11806 | 2 | SmallVector<Value *, 2> ProductOps; |
11807 | 2 | ProductOps.push_back(Ops[1]); |
11808 | 2 | ProductOps.push_back(EmitScalarExpr(E->getArg(2))); |
11809 | 2 | Ops[1] = |
11810 | 2 | EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), |
11811 | 2 | ProductOps, "vqdmlXl"); |
11812 | | |
11813 | 2 | unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 |
11814 | 2 | ? Intrinsic::aarch64_neon_sqadd1 |
11815 | 2 | : Intrinsic::aarch64_neon_sqsub1 ; |
11816 | 2 | return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); |
11817 | 1 | } |
11818 | 1 | case NEON::BI__builtin_neon_vqdmlals_lane_s32: |
11819 | 2 | case NEON::BI__builtin_neon_vqdmlals_laneq_s32: |
11820 | 3 | case NEON::BI__builtin_neon_vqdmlsls_lane_s32: |
11821 | 4 | case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { |
11822 | 4 | Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), |
11823 | 4 | "lane"); |
11824 | 4 | SmallVector<Value *, 2> ProductOps; |
11825 | 4 | ProductOps.push_back(Ops[1]); |
11826 | 4 | ProductOps.push_back(Ops[2]); |
11827 | 4 | Ops[1] = |
11828 | 4 | EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), |
11829 | 4 | ProductOps, "vqdmlXl"); |
11830 | 4 | Ops.pop_back(); |
11831 | | |
11832 | 4 | unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || |
11833 | 4 | BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s323 ) |
11834 | 4 | ? Intrinsic::aarch64_neon_sqadd2 |
11835 | 4 | : Intrinsic::aarch64_neon_sqsub2 ; |
11836 | 4 | return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); |
11837 | 3 | } |
11838 | 25 | case NEON::BI__builtin_neon_vget_lane_bf16: |
11839 | 26 | case NEON::BI__builtin_neon_vduph_lane_bf16: |
11840 | 27 | case NEON::BI__builtin_neon_vduph_lane_f16: { |
11841 | 27 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11842 | 27 | "vget_lane"); |
11843 | 26 | } |
11844 | 25 | case NEON::BI__builtin_neon_vgetq_lane_bf16: |
11845 | 26 | case NEON::BI__builtin_neon_vduph_laneq_bf16: |
11846 | 27 | case NEON::BI__builtin_neon_vduph_laneq_f16: { |
11847 | 27 | return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), |
11848 | 27 | "vgetq_lane"); |
11849 | 26 | } |
11850 | | |
11851 | 8 | case clang::AArch64::BI_InterlockedAdd: { |
11852 | 8 | Value *Arg0 = EmitScalarExpr(E->getArg(0)); |
11853 | 8 | Value *Arg1 = EmitScalarExpr(E->getArg(1)); |
11854 | 8 | AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( |
11855 | 8 | AtomicRMWInst::Add, Arg0, Arg1, |
11856 | 8 | llvm::AtomicOrdering::SequentiallyConsistent); |
11857 | 8 | return Builder.CreateAdd(RMWI, Arg1); |
11858 | 26 | } |
11859 | 3.49k | } |
11860 | | |
11861 | 2.85k | llvm::FixedVectorType *VTy = GetNeonType(this, Type); |
11862 | 2.85k | llvm::Type *Ty = VTy; |
11863 | 2.85k | if (!Ty) |
11864 | 0 | return nullptr; |
11865 | | |
11866 | | // Not all intrinsics handled by the common case work for AArch64 yet, so only |
11867 | | // defer to common code if it's been added to our special map. |
11868 | 2.85k | Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, |
11869 | 2.85k | AArch64SIMDIntrinsicsProvenSorted); |
11870 | | |
11871 | 2.85k | if (Builtin) |
11872 | 1.37k | return EmitCommonNeonBuiltinExpr( |
11873 | 1.37k | Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, |
11874 | 1.37k | Builtin->NameHint, Builtin->TypeModifier, E, Ops, |
11875 | 1.37k | /*never use addresses*/ Address::invalid(), Address::invalid(), Arch); |
11876 | | |
11877 | 1.47k | if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch)) |
11878 | 72 | return V; |
11879 | | |
11880 | 1.40k | unsigned Int; |
11881 | 1.40k | switch (BuiltinID) { |
11882 | 0 | default: return nullptr; |
11883 | 14 | case NEON::BI__builtin_neon_vbsl_v: |
11884 | 27 | case NEON::BI__builtin_neon_vbslq_v: { |
11885 | 27 | llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); |
11886 | 27 | Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); |
11887 | 27 | Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); |
11888 | 27 | Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); |
11889 | | |
11890 | 27 | Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); |
11891 | 27 | Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); |
11892 | 27 | Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); |
11893 | 27 | return Builder.CreateBitCast(Ops[0], Ty); |
11894 | 14 | } |
11895 | 24 | case NEON::BI__builtin_neon_vfma_lane_v: |
11896 | 40 | case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types |
11897 | | // The ARM builtins (and instructions) have the addend as the first |
11898 | | // operand, but the 'fma' intrinsics have it last. Swap it around here. |
11899 | 40 | Value *Addend = Ops[0]; |
11900 | 40 | Value *Multiplicand = Ops[1]; |
11901 | 40 | Value *LaneSource = Ops[2]; |
11902 | 40 | Ops[0] = Multiplicand; |
11903 | 40 | Ops[1] = LaneSource; |
11904 | 40 | Ops[2] = Addend; |
11905 | | |
11906 | | // Now adjust things to handle the lane access. |
11907 | 40 | auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v |
11908 | 40 | ? llvm::FixedVectorType::get(VTy->getElementType(), |
11909 | 16 | VTy->getNumElements() / 2) |
11910 | 40 | : VTy24 ; |
11911 | 40 | llvm::Constant *cst = cast<Constant>(Ops[3]); |
11912 | 40 | Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst); |
11913 | 40 | Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); |
11914 | 40 | Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); |
11915 | | |
11916 | 40 | Ops.pop_back(); |
11917 | 40 | Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma12 |
11918 | 40 | : Intrinsic::fma28 ; |
11919 | 40 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); |
11920 | 24 | } |
11921 | 24 | case NEON::BI__builtin_neon_vfma_laneq_v: { |
11922 | 24 | auto *VTy = cast<llvm::FixedVectorType>(Ty); |
11923 | | // v1f64 fma should be mapped to Neon scalar f64 fma |
11924 | 24 | if (VTy && VTy->getElementType() == DoubleTy) { |
11925 | 10 | Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); |
11926 | 10 | Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); |
11927 | 10 | llvm::FixedVectorType *VTy = |
11928 | 10 | GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); |
11929 | 10 | Ops[2] = Builder.CreateBitCast(Ops[2], VTy); |
11930 | 10 | Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); |
11931 | 10 | Value *Result; |
11932 | 10 | Result = emitCallMaybeConstrainedFPBuiltin( |
11933 | 10 | *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, |
11934 | 10 | DoubleTy, {Ops[1], Ops[2], Ops[0]}); |
11935 | 10 | return Builder.CreateBitCast(Result, Ty); |
11936 | 10 | } |
11937 | 14 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
11938 | 14 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
11939 | | |
11940 | 14 | auto *STy = llvm::FixedVectorType::get(VTy->getElementType(), |
11941 | 14 | VTy->getNumElements() * 2); |
11942 | 14 | Ops[2] = Builder.CreateBitCast(Ops[2], STy); |
11943 | 14 | Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), |
11944 | 14 | cast<ConstantInt>(Ops[3])); |
11945 | 14 | Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); |
11946 | | |
11947 | 14 | return emitCallMaybeConstrainedFPBuiltin( |
11948 | 14 | *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, |
11949 | 14 | {Ops[2], Ops[1], Ops[0]}); |
11950 | 24 | } |
11951 | 18 | case NEON::BI__builtin_neon_vfmaq_laneq_v: { |
11952 | 18 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
11953 | 18 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
11954 | | |
11955 | 18 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
11956 | 18 | Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); |
11957 | 18 | return emitCallMaybeConstrainedFPBuiltin( |
11958 | 18 | *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, |
11959 | 18 | {Ops[2], Ops[1], Ops[0]}); |
11960 | 24 | } |
11961 | 10 | case NEON::BI__builtin_neon_vfmah_lane_f16: |
11962 | 20 | case NEON::BI__builtin_neon_vfmas_lane_f32: |
11963 | 30 | case NEON::BI__builtin_neon_vfmah_laneq_f16: |
11964 | 32 | case NEON::BI__builtin_neon_vfmas_laneq_f32: |
11965 | 38 | case NEON::BI__builtin_neon_vfmad_lane_f64: |
11966 | 44 | case NEON::BI__builtin_neon_vfmad_laneq_f64: { |
11967 | 44 | Ops.push_back(EmitScalarExpr(E->getArg(3))); |
11968 | 44 | llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); |
11969 | 44 | Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); |
11970 | 44 | return emitCallMaybeConstrainedFPBuiltin( |
11971 | 44 | *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, |
11972 | 44 | {Ops[1], Ops[2], Ops[0]}); |
11973 | 38 | } |
11974 | 11 | case NEON::BI__builtin_neon_vmull_v: |
11975 | | // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. |
11976 | 11 | Int = usgn ? Intrinsic::aarch64_neon_umull5 : Intrinsic::aarch64_neon_smull6 ; |
11977 | 11 | if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull1 ; |
11978 | 11 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); |
11979 | 9 | case NEON::BI__builtin_neon_vmax_v: |
11980 | 18 | case NEON::BI__builtin_neon_vmaxq_v: |
11981 | | // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. |
11982 | 18 | Int = usgn ? Intrinsic::aarch64_neon_umax6 : Intrinsic::aarch64_neon_smax12 ; |
11983 | 18 | if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax6 ; |
11984 | 18 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); |
11985 | 1 | case NEON::BI__builtin_neon_vmaxh_f16: { |
11986 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11987 | 1 | Int = Intrinsic::aarch64_neon_fmax; |
11988 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax"); |
11989 | 9 | } |
11990 | 9 | case NEON::BI__builtin_neon_vmin_v: |
11991 | 18 | case NEON::BI__builtin_neon_vminq_v: |
11992 | | // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. |
11993 | 18 | Int = usgn ? Intrinsic::aarch64_neon_umin6 : Intrinsic::aarch64_neon_smin12 ; |
11994 | 18 | if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin6 ; |
11995 | 18 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); |
11996 | 1 | case NEON::BI__builtin_neon_vminh_f16: { |
11997 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
11998 | 1 | Int = Intrinsic::aarch64_neon_fmin; |
11999 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin"); |
12000 | 9 | } |
12001 | 9 | case NEON::BI__builtin_neon_vabd_v: |
12002 | 18 | case NEON::BI__builtin_neon_vabdq_v: |
12003 | | // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. |
12004 | 18 | Int = usgn ? Intrinsic::aarch64_neon_uabd6 : Intrinsic::aarch64_neon_sabd12 ; |
12005 | 18 | if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd6 ; |
12006 | 18 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); |
12007 | 6 | case NEON::BI__builtin_neon_vpadal_v: |
12008 | 12 | case NEON::BI__builtin_neon_vpadalq_v: { |
12009 | 12 | unsigned ArgElts = VTy->getNumElements(); |
12010 | 12 | llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); |
12011 | 12 | unsigned BitWidth = EltTy->getBitWidth(); |
12012 | 12 | auto *ArgTy = llvm::FixedVectorType::get( |
12013 | 12 | llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts); |
12014 | 12 | llvm::Type* Tys[2] = { VTy, ArgTy }; |
12015 | 12 | Int = usgn ? Intrinsic::aarch64_neon_uaddlp6 : Intrinsic::aarch64_neon_saddlp6 ; |
12016 | 12 | SmallVector<llvm::Value*, 1> TmpOps; |
12017 | 12 | TmpOps.push_back(Ops[1]); |
12018 | 12 | Function *F = CGM.getIntrinsic(Int, Tys); |
12019 | 12 | llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); |
12020 | 12 | llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); |
12021 | 12 | return Builder.CreateAdd(tmp, addend); |
12022 | 6 | } |
12023 | 8 | case NEON::BI__builtin_neon_vpmin_v: |
12024 | 17 | case NEON::BI__builtin_neon_vpminq_v: |
12025 | | // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. |
12026 | 17 | Int = usgn ? Intrinsic::aarch64_neon_uminp6 : Intrinsic::aarch64_neon_sminp11 ; |
12027 | 17 | if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp5 ; |
12028 | 17 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); |
12029 | 8 | case NEON::BI__builtin_neon_vpmax_v: |
12030 | 17 | case NEON::BI__builtin_neon_vpmaxq_v: |
12031 | | // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. |
12032 | 17 | Int = usgn ? Intrinsic::aarch64_neon_umaxp6 : Intrinsic::aarch64_neon_smaxp11 ; |
12033 | 17 | if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp5 ; |
12034 | 17 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); |
12035 | 3 | case NEON::BI__builtin_neon_vminnm_v: |
12036 | 6 | case NEON::BI__builtin_neon_vminnmq_v: |
12037 | 6 | Int = Intrinsic::aarch64_neon_fminnm; |
12038 | 6 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); |
12039 | 1 | case NEON::BI__builtin_neon_vminnmh_f16: |
12040 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
12041 | 1 | Int = Intrinsic::aarch64_neon_fminnm; |
12042 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); |
12043 | 3 | case NEON::BI__builtin_neon_vmaxnm_v: |
12044 | 6 | case NEON::BI__builtin_neon_vmaxnmq_v: |
12045 | 6 | Int = Intrinsic::aarch64_neon_fmaxnm; |
12046 | 6 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); |
12047 | 1 | case NEON::BI__builtin_neon_vmaxnmh_f16: |
12048 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
12049 | 1 | Int = Intrinsic::aarch64_neon_fmaxnm; |
12050 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); |
12051 | 1 | case NEON::BI__builtin_neon_vrecpss_f32: { |
12052 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
12053 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), |
12054 | 1 | Ops, "vrecps"); |
12055 | 3 | } |
12056 | 1 | case NEON::BI__builtin_neon_vrecpsd_f64: |
12057 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
12058 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), |
12059 | 1 | Ops, "vrecps"); |
12060 | 1 | case NEON::BI__builtin_neon_vrecpsh_f16: |
12061 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
12062 | 1 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy), |
12063 | 1 | Ops, "vrecps"); |
12064 | 6 | case NEON::BI__builtin_neon_vqshrun_n_v: |
12065 | 6 | Int = Intrinsic::aarch64_neon_sqshrun; |
12066 | 6 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); |
12067 | 6 | case NEON::BI__builtin_neon_vqrshrun_n_v: |
12068 | 6 | Int = Intrinsic::aarch64_neon_sqrshrun; |
12069 | 6 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); |
12070 | 12 | case NEON::BI__builtin_neon_vqshrn_n_v: |
12071 | 12 | Int = usgn ? Intrinsic::aarch64_neon_uqshrn6 : Intrinsic::aarch64_neon_sqshrn6 ; |
12072 | 12 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); |
12073 | 12 | case NEON::BI__builtin_neon_vrshrn_n_v: |
12074 | 12 | Int = Intrinsic::aarch64_neon_rshrn; |
12075 | 12 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); |
12076 | 13 | case NEON::BI__builtin_neon_vqrshrn_n_v: |
12077 | 13 | Int = usgn ? Intrinsic::aarch64_neon_uqrshrn6 : Intrinsic::aarch64_neon_sqrshrn7 ; |
12078 | 13 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); |
12079 | 3 | case NEON::BI__builtin_neon_vrndah_f16: { |
12080 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12081 | 3 | Int = Builder.getIsFPConstrained() |
12082 | 3 | ? Intrinsic::experimental_constrained_round1 |
12083 | 3 | : Intrinsic::round2 ; |
12084 | 3 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); |
12085 | 3 | } |
12086 | 5 | case NEON::BI__builtin_neon_vrnda_v: |
12087 | 17 | case NEON::BI__builtin_neon_vrndaq_v: { |
12088 | 17 | Int = Builder.getIsFPConstrained() |
12089 | 17 | ? Intrinsic::experimental_constrained_round5 |
12090 | 17 | : Intrinsic::round12 ; |
12091 | 17 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); |
12092 | 5 | } |
12093 | 3 | case NEON::BI__builtin_neon_vrndih_f16: { |
12094 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12095 | 3 | Int = Builder.getIsFPConstrained() |
12096 | 3 | ? Intrinsic::experimental_constrained_nearbyint1 |
12097 | 3 | : Intrinsic::nearbyint2 ; |
12098 | 3 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); |
12099 | 5 | } |
12100 | 3 | case NEON::BI__builtin_neon_vrndmh_f16: { |
12101 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12102 | 3 | Int = Builder.getIsFPConstrained() |
12103 | 3 | ? Intrinsic::experimental_constrained_floor1 |
12104 | 3 | : Intrinsic::floor2 ; |
12105 | 3 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); |
12106 | 5 | } |
12107 | 5 | case NEON::BI__builtin_neon_vrndm_v: |
12108 | 13 | case NEON::BI__builtin_neon_vrndmq_v: { |
12109 | 13 | Int = Builder.getIsFPConstrained() |
12110 | 13 | ? Intrinsic::experimental_constrained_floor3 |
12111 | 13 | : Intrinsic::floor10 ; |
12112 | 13 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); |
12113 | 5 | } |
12114 | 1 | case NEON::BI__builtin_neon_vrndnh_f16: { |
12115 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12116 | 1 | Int = Builder.getIsFPConstrained() |
12117 | 1 | ? Intrinsic::experimental_constrained_roundeven0 |
12118 | 1 | : Intrinsic::roundeven; |
12119 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); |
12120 | 5 | } |
12121 | 3 | case NEON::BI__builtin_neon_vrndn_v: |
12122 | 7 | case NEON::BI__builtin_neon_vrndnq_v: { |
12123 | 7 | Int = Builder.getIsFPConstrained() |
12124 | 7 | ? Intrinsic::experimental_constrained_roundeven0 |
12125 | 7 | : Intrinsic::roundeven; |
12126 | 7 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); |
12127 | 3 | } |
12128 | 1 | case NEON::BI__builtin_neon_vrndns_f32: { |
12129 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12130 | 1 | Int = Builder.getIsFPConstrained() |
12131 | 1 | ? Intrinsic::experimental_constrained_roundeven0 |
12132 | 1 | : Intrinsic::roundeven; |
12133 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn"); |
12134 | 3 | } |
12135 | 3 | case NEON::BI__builtin_neon_vrndph_f16: { |
12136 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12137 | 3 | Int = Builder.getIsFPConstrained() |
12138 | 3 | ? Intrinsic::experimental_constrained_ceil1 |
12139 | 3 | : Intrinsic::ceil2 ; |
12140 | 3 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); |
12141 | 3 | } |
12142 | 5 | case NEON::BI__builtin_neon_vrndp_v: |
12143 | 17 | case NEON::BI__builtin_neon_vrndpq_v: { |
12144 | 17 | Int = Builder.getIsFPConstrained() |
12145 | 17 | ? Intrinsic::experimental_constrained_ceil5 |
12146 | 17 | : Intrinsic::ceil12 ; |
12147 | 17 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); |
12148 | 5 | } |
12149 | 3 | case NEON::BI__builtin_neon_vrndxh_f16: { |
12150 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12151 | 3 | Int = Builder.getIsFPConstrained() |
12152 | 3 | ? Intrinsic::experimental_constrained_rint1 |
12153 | 3 | : Intrinsic::rint2 ; |
12154 | 3 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); |
12155 | 5 | } |
12156 | 5 | case NEON::BI__builtin_neon_vrndx_v: |
12157 | 13 | case NEON::BI__builtin_neon_vrndxq_v: { |
12158 | 13 | Int = Builder.getIsFPConstrained() |
12159 | 13 | ? Intrinsic::experimental_constrained_rint3 |
12160 | 13 | : Intrinsic::rint10 ; |
12161 | 13 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); |
12162 | 5 | } |
12163 | 3 | case NEON::BI__builtin_neon_vrndh_f16: { |
12164 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12165 | 3 | Int = Builder.getIsFPConstrained() |
12166 | 3 | ? Intrinsic::experimental_constrained_trunc1 |
12167 | 3 | : Intrinsic::trunc2 ; |
12168 | 3 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); |
12169 | 5 | } |
12170 | 0 | case NEON::BI__builtin_neon_vrnd32x_f32: |
12171 | 0 | case NEON::BI__builtin_neon_vrnd32xq_f32: |
12172 | 0 | case NEON::BI__builtin_neon_vrnd32x_f64: |
12173 | 0 | case NEON::BI__builtin_neon_vrnd32xq_f64: { |
12174 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12175 | 0 | Int = Intrinsic::aarch64_neon_frint32x; |
12176 | 0 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x"); |
12177 | 0 | } |
12178 | 0 | case NEON::BI__builtin_neon_vrnd32z_f32: |
12179 | 0 | case NEON::BI__builtin_neon_vrnd32zq_f32: |
12180 | 0 | case NEON::BI__builtin_neon_vrnd32z_f64: |
12181 | 0 | case NEON::BI__builtin_neon_vrnd32zq_f64: { |
12182 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12183 | 0 | Int = Intrinsic::aarch64_neon_frint32z; |
12184 | 0 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z"); |
12185 | 0 | } |
12186 | 0 | case NEON::BI__builtin_neon_vrnd64x_f32: |
12187 | 0 | case NEON::BI__builtin_neon_vrnd64xq_f32: |
12188 | 0 | case NEON::BI__builtin_neon_vrnd64x_f64: |
12189 | 0 | case NEON::BI__builtin_neon_vrnd64xq_f64: { |
12190 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12191 | 0 | Int = Intrinsic::aarch64_neon_frint64x; |
12192 | 0 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x"); |
12193 | 0 | } |
12194 | 0 | case NEON::BI__builtin_neon_vrnd64z_f32: |
12195 | 0 | case NEON::BI__builtin_neon_vrnd64zq_f32: |
12196 | 0 | case NEON::BI__builtin_neon_vrnd64z_f64: |
12197 | 0 | case NEON::BI__builtin_neon_vrnd64zq_f64: { |
12198 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12199 | 0 | Int = Intrinsic::aarch64_neon_frint64z; |
12200 | 0 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z"); |
12201 | 0 | } |
12202 | 5 | case NEON::BI__builtin_neon_vrnd_v: |
12203 | 13 | case NEON::BI__builtin_neon_vrndq_v: { |
12204 | 13 | Int = Builder.getIsFPConstrained() |
12205 | 13 | ? Intrinsic::experimental_constrained_trunc3 |
12206 | 13 | : Intrinsic::trunc10 ; |
12207 | 13 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); |
12208 | 5 | } |
12209 | 6 | case NEON::BI__builtin_neon_vcvt_f64_v: |
12210 | 8 | case NEON::BI__builtin_neon_vcvtq_f64_v: |
12211 | 8 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
12212 | 8 | Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); |
12213 | 8 | return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")4 |
12214 | 8 | : Builder.CreateSIToFP(Ops[0], Ty, "vcvt")4 ; |
12215 | 1 | case NEON::BI__builtin_neon_vcvt_f64_f32: { |
12216 | 1 | assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && |
12217 | 1 | "unexpected vcvt_f64_f32 builtin"); |
12218 | 1 | NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); |
12219 | 1 | Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); |
12220 | | |
12221 | 1 | return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); |
12222 | 1 | } |
12223 | 1 | case NEON::BI__builtin_neon_vcvt_f32_f64: { |
12224 | 1 | assert(Type.getEltType() == NeonTypeFlags::Float32 && |
12225 | 1 | "unexpected vcvt_f32_f64 builtin"); |
12226 | 1 | NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); |
12227 | 1 | Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); |
12228 | | |
12229 | 1 | return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); |
12230 | 1 | } |
12231 | 1 | case NEON::BI__builtin_neon_vcvt_s32_v: |
12232 | 2 | case NEON::BI__builtin_neon_vcvt_u32_v: |
12233 | 5 | case NEON::BI__builtin_neon_vcvt_s64_v: |
12234 | 8 | case NEON::BI__builtin_neon_vcvt_u64_v: |
12235 | 9 | case NEON::BI__builtin_neon_vcvt_s16_f16: |
12236 | 10 | case NEON::BI__builtin_neon_vcvt_u16_f16: |
12237 | 11 | case NEON::BI__builtin_neon_vcvtq_s32_v: |
12238 | 12 | case NEON::BI__builtin_neon_vcvtq_u32_v: |
12239 | 13 | case NEON::BI__builtin_neon_vcvtq_s64_v: |
12240 | 14 | case NEON::BI__builtin_neon_vcvtq_u64_v: |
12241 | 15 | case NEON::BI__builtin_neon_vcvtq_s16_f16: |
12242 | 16 | case NEON::BI__builtin_neon_vcvtq_u16_f16: { |
12243 | 16 | Int = |
12244 | 16 | usgn ? Intrinsic::aarch64_neon_fcvtzu8 : Intrinsic::aarch64_neon_fcvtzs8 ; |
12245 | 16 | llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)}; |
12246 | 16 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); |
12247 | 15 | } |
12248 | 1 | case NEON::BI__builtin_neon_vcvta_s16_f16: |
12249 | 2 | case NEON::BI__builtin_neon_vcvta_u16_f16: |
12250 | 3 | case NEON::BI__builtin_neon_vcvta_s32_v: |
12251 | 4 | case NEON::BI__builtin_neon_vcvtaq_s16_f16: |
12252 | 5 | case NEON::BI__builtin_neon_vcvtaq_s32_v: |
12253 | 6 | case NEON::BI__builtin_neon_vcvta_u32_v: |
12254 | 6 | case NEON::BI__builtin_neon_vcvtaq_u16_f16: |
12255 | 7 | case NEON::BI__builtin_neon_vcvtaq_u32_v: |
12256 | 8 | case NEON::BI__builtin_neon_vcvta_s64_v: |
12257 | 9 | case NEON::BI__builtin_neon_vcvtaq_s64_v: |
12258 | 10 | case NEON::BI__builtin_neon_vcvta_u64_v: |
12259 | 11 | case NEON::BI__builtin_neon_vcvtaq_u64_v: { |
12260 | 11 | Int = usgn ? Intrinsic::aarch64_neon_fcvtau5 : Intrinsic::aarch64_neon_fcvtas6 ; |
12261 | 11 | llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; |
12262 | 11 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); |
12263 | 10 | } |
12264 | 1 | case NEON::BI__builtin_neon_vcvtm_s16_f16: |
12265 | 2 | case NEON::BI__builtin_neon_vcvtm_s32_v: |
12266 | 3 | case NEON::BI__builtin_neon_vcvtmq_s16_f16: |
12267 | 4 | case NEON::BI__builtin_neon_vcvtmq_s32_v: |
12268 | 5 | case NEON::BI__builtin_neon_vcvtm_u16_f16: |
12269 | 6 | case NEON::BI__builtin_neon_vcvtm_u32_v: |
12270 | 7 | case NEON::BI__builtin_neon_vcvtmq_u16_f16: |
12271 | 8 | case NEON::BI__builtin_neon_vcvtmq_u32_v: |
12272 | 9 | case NEON::BI__builtin_neon_vcvtm_s64_v: |
12273 | 10 | case NEON::BI__builtin_neon_vcvtmq_s64_v: |
12274 | 11 | case NEON::BI__builtin_neon_vcvtm_u64_v: |
12275 | 12 | case NEON::BI__builtin_neon_vcvtmq_u64_v: { |
12276 | 12 | Int = usgn ? Intrinsic::aarch64_neon_fcvtmu6 : Intrinsic::aarch64_neon_fcvtms6 ; |
12277 | 12 | llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; |
12278 | 12 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); |
12279 | 11 | } |
12280 | 1 | case NEON::BI__builtin_neon_vcvtn_s16_f16: |
12281 | 2 | case NEON::BI__builtin_neon_vcvtn_s32_v: |
12282 | 3 | case NEON::BI__builtin_neon_vcvtnq_s16_f16: |
12283 | 4 | case NEON::BI__builtin_neon_vcvtnq_s32_v: |
12284 | 5 | case NEON::BI__builtin_neon_vcvtn_u16_f16: |
12285 | 6 | case NEON::BI__builtin_neon_vcvtn_u32_v: |
12286 | 7 | case NEON::BI__builtin_neon_vcvtnq_u16_f16: |
12287 | 8 | case NEON::BI__builtin_neon_vcvtnq_u32_v: |
12288 | 9 | case NEON::BI__builtin_neon_vcvtn_s64_v: |
12289 | 10 | case NEON::BI__builtin_neon_vcvtnq_s64_v: |
12290 | 11 | case NEON::BI__builtin_neon_vcvtn_u64_v: |
12291 | 12 | case NEON::BI__builtin_neon_vcvtnq_u64_v: { |
12292 | 12 | Int = usgn ? Intrinsic::aarch64_neon_fcvtnu6 : Intrinsic::aarch64_neon_fcvtns6 ; |
12293 | 12 | llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; |
12294 | 12 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); |
12295 | 11 | } |
12296 | 1 | case NEON::BI__builtin_neon_vcvtp_s16_f16: |
12297 | 2 | case NEON::BI__builtin_neon_vcvtp_s32_v: |
12298 | 3 | case NEON::BI__builtin_neon_vcvtpq_s16_f16: |
12299 | 4 | case NEON::BI__builtin_neon_vcvtpq_s32_v: |
12300 | 5 | case NEON::BI__builtin_neon_vcvtp_u16_f16: |
12301 | 6 | case NEON::BI__builtin_neon_vcvtp_u32_v: |
12302 | 7 | case NEON::BI__builtin_neon_vcvtpq_u16_f16: |
12303 | 8 | case NEON::BI__builtin_neon_vcvtpq_u32_v: |
12304 | 9 | case NEON::BI__builtin_neon_vcvtp_s64_v: |
12305 | 10 | case NEON::BI__builtin_neon_vcvtpq_s64_v: |
12306 | 11 | case NEON::BI__builtin_neon_vcvtp_u64_v: |
12307 | 12 | case NEON::BI__builtin_neon_vcvtpq_u64_v: { |
12308 | 12 | Int = usgn ? Intrinsic::aarch64_neon_fcvtpu6 : Intrinsic::aarch64_neon_fcvtps6 ; |
12309 | 12 | llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; |
12310 | 12 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); |
12311 | 11 | } |
12312 | 4 | case NEON::BI__builtin_neon_vmulx_v: |
12313 | 9 | case NEON::BI__builtin_neon_vmulxq_v: { |
12314 | 9 | Int = Intrinsic::aarch64_neon_fmulx; |
12315 | 9 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); |
12316 | 4 | } |
12317 | 1 | case NEON::BI__builtin_neon_vmulxh_lane_f16: |
12318 | 2 | case NEON::BI__builtin_neon_vmulxh_laneq_f16: { |
12319 | | // vmulx_lane should be mapped to Neon scalar mulx after |
12320 | | // extracting the scalar element |
12321 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(2))); |
12322 | 2 | Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); |
12323 | 2 | Ops.pop_back(); |
12324 | 2 | Int = Intrinsic::aarch64_neon_fmulx; |
12325 | 2 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx"); |
12326 | 1 | } |
12327 | 1 | case NEON::BI__builtin_neon_vmul_lane_v: |
12328 | 3 | case NEON::BI__builtin_neon_vmul_laneq_v: { |
12329 | | // v1f64 vmul_lane should be mapped to Neon scalar mul lane |
12330 | 3 | bool Quad = false; |
12331 | 3 | if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) |
12332 | 2 | Quad = true; |
12333 | 3 | Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); |
12334 | 3 | llvm::FixedVectorType *VTy = |
12335 | 3 | GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); |
12336 | 3 | Ops[1] = Builder.CreateBitCast(Ops[1], VTy); |
12337 | 3 | Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); |
12338 | 3 | Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); |
12339 | 3 | return Builder.CreateBitCast(Result, Ty); |
12340 | 1 | } |
12341 | 1 | case NEON::BI__builtin_neon_vnegd_s64: |
12342 | 1 | return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); |
12343 | 1 | case NEON::BI__builtin_neon_vnegh_f16: |
12344 | 1 | return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); |
12345 | 2 | case NEON::BI__builtin_neon_vpmaxnm_v: |
12346 | 5 | case NEON::BI__builtin_neon_vpmaxnmq_v: { |
12347 | 5 | Int = Intrinsic::aarch64_neon_fmaxnmp; |
12348 | 5 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); |
12349 | 2 | } |
12350 | 2 | case NEON::BI__builtin_neon_vpminnm_v: |
12351 | 5 | case NEON::BI__builtin_neon_vpminnmq_v: { |
12352 | 5 | Int = Intrinsic::aarch64_neon_fminnmp; |
12353 | 5 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); |
12354 | 2 | } |
12355 | 3 | case NEON::BI__builtin_neon_vsqrth_f16: { |
12356 | 3 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12357 | 3 | Int = Builder.getIsFPConstrained() |
12358 | 3 | ? Intrinsic::experimental_constrained_sqrt1 |
12359 | 3 | : Intrinsic::sqrt2 ; |
12360 | 3 | return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); |
12361 | 2 | } |
12362 | 9 | case NEON::BI__builtin_neon_vsqrt_v: |
12363 | 24 | case NEON::BI__builtin_neon_vsqrtq_v: { |
12364 | 24 | Int = Builder.getIsFPConstrained() |
12365 | 24 | ? Intrinsic::experimental_constrained_sqrt9 |
12366 | 24 | : Intrinsic::sqrt15 ; |
12367 | 24 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
12368 | 24 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); |
12369 | 9 | } |
12370 | 3 | case NEON::BI__builtin_neon_vrbit_v: |
12371 | 6 | case NEON::BI__builtin_neon_vrbitq_v: { |
12372 | 6 | Int = Intrinsic::bitreverse; |
12373 | 6 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); |
12374 | 3 | } |
12375 | 1 | case NEON::BI__builtin_neon_vaddv_u8: |
12376 | | // FIXME: These are handled by the AArch64 scalar code. |
12377 | 1 | usgn = true; |
12378 | 1 | [[fallthrough]]; |
12379 | 2 | case NEON::BI__builtin_neon_vaddv_s8: { |
12380 | 2 | Int = usgn ? Intrinsic::aarch64_neon_uaddv1 : Intrinsic::aarch64_neon_saddv1 ; |
12381 | 2 | Ty = Int32Ty; |
12382 | 2 | VTy = llvm::FixedVectorType::get(Int8Ty, 8); |
12383 | 2 | llvm::Type *Tys[2] = { Ty, VTy }; |
12384 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12385 | 2 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); |
12386 | 2 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12387 | 1 | } |
12388 | 1 | case NEON::BI__builtin_neon_vaddv_u16: |
12389 | 1 | usgn = true; |
12390 | 1 | [[fallthrough]]; |
12391 | 2 | case NEON::BI__builtin_neon_vaddv_s16: { |
12392 | 2 | Int = usgn ? Intrinsic::aarch64_neon_uaddv1 : Intrinsic::aarch64_neon_saddv1 ; |
12393 | 2 | Ty = Int32Ty; |
12394 | 2 | VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
12395 | 2 | llvm::Type *Tys[2] = { Ty, VTy }; |
12396 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12397 | 2 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); |
12398 | 2 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12399 | 1 | } |
12400 | 1 | case NEON::BI__builtin_neon_vaddvq_u8: |
12401 | 1 | usgn = true; |
12402 | 1 | [[fallthrough]]; |
12403 | 2 | case NEON::BI__builtin_neon_vaddvq_s8: { |
12404 | 2 | Int = usgn ? Intrinsic::aarch64_neon_uaddv1 : Intrinsic::aarch64_neon_saddv1 ; |
12405 | 2 | Ty = Int32Ty; |
12406 | 2 | VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
12407 | 2 | llvm::Type *Tys[2] = { Ty, VTy }; |
12408 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12409 | 2 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); |
12410 | 2 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12411 | 1 | } |
12412 | 1 | case NEON::BI__builtin_neon_vaddvq_u16: |
12413 | 1 | usgn = true; |
12414 | 1 | [[fallthrough]]; |
12415 | 2 | case NEON::BI__builtin_neon_vaddvq_s16: { |
12416 | 2 | Int = usgn ? Intrinsic::aarch64_neon_uaddv1 : Intrinsic::aarch64_neon_saddv1 ; |
12417 | 2 | Ty = Int32Ty; |
12418 | 2 | VTy = llvm::FixedVectorType::get(Int16Ty, 8); |
12419 | 2 | llvm::Type *Tys[2] = { Ty, VTy }; |
12420 | 2 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12421 | 2 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); |
12422 | 2 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12423 | 1 | } |
12424 | 1 | case NEON::BI__builtin_neon_vmaxv_u8: { |
12425 | 1 | Int = Intrinsic::aarch64_neon_umaxv; |
12426 | 1 | Ty = Int32Ty; |
12427 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 8); |
12428 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12429 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12430 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12431 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12432 | 1 | } |
12433 | 1 | case NEON::BI__builtin_neon_vmaxv_u16: { |
12434 | 1 | Int = Intrinsic::aarch64_neon_umaxv; |
12435 | 1 | Ty = Int32Ty; |
12436 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
12437 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12438 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12439 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12440 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12441 | 1 | } |
12442 | 1 | case NEON::BI__builtin_neon_vmaxvq_u8: { |
12443 | 1 | Int = Intrinsic::aarch64_neon_umaxv; |
12444 | 1 | Ty = Int32Ty; |
12445 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
12446 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12447 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12448 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12449 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12450 | 1 | } |
12451 | 1 | case NEON::BI__builtin_neon_vmaxvq_u16: { |
12452 | 1 | Int = Intrinsic::aarch64_neon_umaxv; |
12453 | 1 | Ty = Int32Ty; |
12454 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 8); |
12455 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12456 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12457 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12458 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12459 | 1 | } |
12460 | 1 | case NEON::BI__builtin_neon_vmaxv_s8: { |
12461 | 1 | Int = Intrinsic::aarch64_neon_smaxv; |
12462 | 1 | Ty = Int32Ty; |
12463 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 8); |
12464 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12465 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12466 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12467 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12468 | 1 | } |
12469 | 1 | case NEON::BI__builtin_neon_vmaxv_s16: { |
12470 | 1 | Int = Intrinsic::aarch64_neon_smaxv; |
12471 | 1 | Ty = Int32Ty; |
12472 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
12473 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12474 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12475 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12476 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12477 | 1 | } |
12478 | 1 | case NEON::BI__builtin_neon_vmaxvq_s8: { |
12479 | 1 | Int = Intrinsic::aarch64_neon_smaxv; |
12480 | 1 | Ty = Int32Ty; |
12481 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
12482 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12483 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12484 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12485 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12486 | 1 | } |
12487 | 1 | case NEON::BI__builtin_neon_vmaxvq_s16: { |
12488 | 1 | Int = Intrinsic::aarch64_neon_smaxv; |
12489 | 1 | Ty = Int32Ty; |
12490 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 8); |
12491 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12492 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12493 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12494 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12495 | 1 | } |
12496 | 1 | case NEON::BI__builtin_neon_vmaxv_f16: { |
12497 | 1 | Int = Intrinsic::aarch64_neon_fmaxv; |
12498 | 1 | Ty = HalfTy; |
12499 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 4); |
12500 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12501 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12502 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12503 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12504 | 1 | } |
12505 | 1 | case NEON::BI__builtin_neon_vmaxvq_f16: { |
12506 | 1 | Int = Intrinsic::aarch64_neon_fmaxv; |
12507 | 1 | Ty = HalfTy; |
12508 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 8); |
12509 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12510 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12511 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); |
12512 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12513 | 1 | } |
12514 | 1 | case NEON::BI__builtin_neon_vminv_u8: { |
12515 | 1 | Int = Intrinsic::aarch64_neon_uminv; |
12516 | 1 | Ty = Int32Ty; |
12517 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 8); |
12518 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12519 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12520 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12521 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12522 | 1 | } |
12523 | 1 | case NEON::BI__builtin_neon_vminv_u16: { |
12524 | 1 | Int = Intrinsic::aarch64_neon_uminv; |
12525 | 1 | Ty = Int32Ty; |
12526 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
12527 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12528 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12529 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12530 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12531 | 1 | } |
12532 | 1 | case NEON::BI__builtin_neon_vminvq_u8: { |
12533 | 1 | Int = Intrinsic::aarch64_neon_uminv; |
12534 | 1 | Ty = Int32Ty; |
12535 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
12536 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12537 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12538 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12539 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12540 | 1 | } |
12541 | 1 | case NEON::BI__builtin_neon_vminvq_u16: { |
12542 | 1 | Int = Intrinsic::aarch64_neon_uminv; |
12543 | 1 | Ty = Int32Ty; |
12544 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 8); |
12545 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12546 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12547 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12548 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12549 | 1 | } |
12550 | 1 | case NEON::BI__builtin_neon_vminv_s8: { |
12551 | 1 | Int = Intrinsic::aarch64_neon_sminv; |
12552 | 1 | Ty = Int32Ty; |
12553 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 8); |
12554 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12555 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12556 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12557 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12558 | 1 | } |
12559 | 1 | case NEON::BI__builtin_neon_vminv_s16: { |
12560 | 1 | Int = Intrinsic::aarch64_neon_sminv; |
12561 | 1 | Ty = Int32Ty; |
12562 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
12563 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12564 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12565 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12566 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12567 | 1 | } |
12568 | 1 | case NEON::BI__builtin_neon_vminvq_s8: { |
12569 | 1 | Int = Intrinsic::aarch64_neon_sminv; |
12570 | 1 | Ty = Int32Ty; |
12571 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
12572 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12573 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12574 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12575 | 1 | return Builder.CreateTrunc(Ops[0], Int8Ty); |
12576 | 1 | } |
12577 | 1 | case NEON::BI__builtin_neon_vminvq_s16: { |
12578 | 1 | Int = Intrinsic::aarch64_neon_sminv; |
12579 | 1 | Ty = Int32Ty; |
12580 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 8); |
12581 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12582 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12583 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12584 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12585 | 1 | } |
12586 | 1 | case NEON::BI__builtin_neon_vminv_f16: { |
12587 | 1 | Int = Intrinsic::aarch64_neon_fminv; |
12588 | 1 | Ty = HalfTy; |
12589 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 4); |
12590 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12591 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12592 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12593 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12594 | 1 | } |
12595 | 1 | case NEON::BI__builtin_neon_vminvq_f16: { |
12596 | 1 | Int = Intrinsic::aarch64_neon_fminv; |
12597 | 1 | Ty = HalfTy; |
12598 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 8); |
12599 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12600 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12601 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); |
12602 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12603 | 1 | } |
12604 | 1 | case NEON::BI__builtin_neon_vmaxnmv_f16: { |
12605 | 1 | Int = Intrinsic::aarch64_neon_fmaxnmv; |
12606 | 1 | Ty = HalfTy; |
12607 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 4); |
12608 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12609 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12610 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); |
12611 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12612 | 1 | } |
12613 | 1 | case NEON::BI__builtin_neon_vmaxnmvq_f16: { |
12614 | 1 | Int = Intrinsic::aarch64_neon_fmaxnmv; |
12615 | 1 | Ty = HalfTy; |
12616 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 8); |
12617 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12618 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12619 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); |
12620 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12621 | 1 | } |
12622 | 1 | case NEON::BI__builtin_neon_vminnmv_f16: { |
12623 | 1 | Int = Intrinsic::aarch64_neon_fminnmv; |
12624 | 1 | Ty = HalfTy; |
12625 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 4); |
12626 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12627 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12628 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); |
12629 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12630 | 1 | } |
12631 | 1 | case NEON::BI__builtin_neon_vminnmvq_f16: { |
12632 | 1 | Int = Intrinsic::aarch64_neon_fminnmv; |
12633 | 1 | Ty = HalfTy; |
12634 | 1 | VTy = llvm::FixedVectorType::get(HalfTy, 8); |
12635 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12636 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12637 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); |
12638 | 1 | return Builder.CreateTrunc(Ops[0], HalfTy); |
12639 | 1 | } |
12640 | 1 | case NEON::BI__builtin_neon_vmul_n_f64: { |
12641 | 1 | Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); |
12642 | 1 | Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); |
12643 | 1 | return Builder.CreateFMul(Ops[0], RHS); |
12644 | 1 | } |
12645 | 1 | case NEON::BI__builtin_neon_vaddlv_u8: { |
12646 | 1 | Int = Intrinsic::aarch64_neon_uaddlv; |
12647 | 1 | Ty = Int32Ty; |
12648 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 8); |
12649 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12650 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12651 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12652 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12653 | 1 | } |
12654 | 1 | case NEON::BI__builtin_neon_vaddlv_u16: { |
12655 | 1 | Int = Intrinsic::aarch64_neon_uaddlv; |
12656 | 1 | Ty = Int32Ty; |
12657 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
12658 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12659 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12660 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12661 | 1 | } |
12662 | 1 | case NEON::BI__builtin_neon_vaddlvq_u8: { |
12663 | 1 | Int = Intrinsic::aarch64_neon_uaddlv; |
12664 | 1 | Ty = Int32Ty; |
12665 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
12666 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12667 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12668 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12669 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12670 | 1 | } |
12671 | 1 | case NEON::BI__builtin_neon_vaddlvq_u16: { |
12672 | 1 | Int = Intrinsic::aarch64_neon_uaddlv; |
12673 | 1 | Ty = Int32Ty; |
12674 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 8); |
12675 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12676 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12677 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12678 | 1 | } |
12679 | 1 | case NEON::BI__builtin_neon_vaddlv_s8: { |
12680 | 1 | Int = Intrinsic::aarch64_neon_saddlv; |
12681 | 1 | Ty = Int32Ty; |
12682 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 8); |
12683 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12684 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12685 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12686 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12687 | 1 | } |
12688 | 1 | case NEON::BI__builtin_neon_vaddlv_s16: { |
12689 | 1 | Int = Intrinsic::aarch64_neon_saddlv; |
12690 | 1 | Ty = Int32Ty; |
12691 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 4); |
12692 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12693 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12694 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12695 | 1 | } |
12696 | 1 | case NEON::BI__builtin_neon_vaddlvq_s8: { |
12697 | 1 | Int = Intrinsic::aarch64_neon_saddlv; |
12698 | 1 | Ty = Int32Ty; |
12699 | 1 | VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
12700 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12701 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12702 | 1 | Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12703 | 1 | return Builder.CreateTrunc(Ops[0], Int16Ty); |
12704 | 1 | } |
12705 | 1 | case NEON::BI__builtin_neon_vaddlvq_s16: { |
12706 | 1 | Int = Intrinsic::aarch64_neon_saddlv; |
12707 | 1 | Ty = Int32Ty; |
12708 | 1 | VTy = llvm::FixedVectorType::get(Int16Ty, 8); |
12709 | 1 | llvm::Type *Tys[2] = { Ty, VTy }; |
12710 | 1 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
12711 | 1 | return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); |
12712 | 1 | } |
12713 | 11 | case NEON::BI__builtin_neon_vsri_n_v: |
12714 | 22 | case NEON::BI__builtin_neon_vsriq_n_v: { |
12715 | 22 | Int = Intrinsic::aarch64_neon_vsri; |
12716 | 22 | llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); |
12717 | 22 | return EmitNeonCall(Intrin, Ops, "vsri_n"); |
12718 | 11 | } |
12719 | 11 | case NEON::BI__builtin_neon_vsli_n_v: |
12720 | 22 | case NEON::BI__builtin_neon_vsliq_n_v: { |
12721 | 22 | Int = Intrinsic::aarch64_neon_vsli; |
12722 | 22 | llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); |
12723 | 22 | return EmitNeonCall(Intrin, Ops, "vsli_n"); |
12724 | 11 | } |
12725 | 11 | case NEON::BI__builtin_neon_vsra_n_v: |
12726 | 19 | case NEON::BI__builtin_neon_vsraq_n_v: |
12727 | 19 | Ops[0] = Builder.CreateBitCast(Ops[0], Ty); |
12728 | 19 | Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); |
12729 | 19 | return Builder.CreateAdd(Ops[0], Ops[1]); |
12730 | 8 | case NEON::BI__builtin_neon_vrsra_n_v: |
12731 | 16 | case NEON::BI__builtin_neon_vrsraq_n_v: { |
12732 | 16 | Int = usgn ? Intrinsic::aarch64_neon_urshl8 : Intrinsic::aarch64_neon_srshl8 ; |
12733 | 16 | SmallVector<llvm::Value*,2> TmpOps; |
12734 | 16 | TmpOps.push_back(Ops[1]); |
12735 | 16 | TmpOps.push_back(Ops[2]); |
12736 | 16 | Function* F = CGM.getIntrinsic(Int, Ty); |
12737 | 16 | llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); |
12738 | 16 | Ops[0] = Builder.CreateBitCast(Ops[0], VTy); |
12739 | 16 | return Builder.CreateAdd(Ops[0], tmp); |
12740 | 8 | } |
12741 | 32 | case NEON::BI__builtin_neon_vld1_v: |
12742 | 47 | case NEON::BI__builtin_neon_vld1q_v: { |
12743 | 47 | return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment()); |
12744 | 32 | } |
12745 | 15 | case NEON::BI__builtin_neon_vst1_v: |
12746 | 30 | case NEON::BI__builtin_neon_vst1q_v: |
12747 | 30 | Ops[1] = Builder.CreateBitCast(Ops[1], VTy); |
12748 | 30 | return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); |
12749 | 15 | case NEON::BI__builtin_neon_vld1_lane_v: |
12750 | 30 | case NEON::BI__builtin_neon_vld1q_lane_v: { |
12751 | 30 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12752 | 30 | Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], |
12753 | 30 | PtrOp0.getAlignment()); |
12754 | 30 | return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); |
12755 | 15 | } |
12756 | 4 | case NEON::BI__builtin_neon_vldap1_lane_s64: |
12757 | 8 | case NEON::BI__builtin_neon_vldap1q_lane_s64: { |
12758 | 8 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12759 | 8 | llvm::LoadInst *LI = Builder.CreateAlignedLoad( |
12760 | 8 | VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); |
12761 | 8 | LI->setAtomic(llvm::AtomicOrdering::Acquire); |
12762 | 8 | Ops[0] = LI; |
12763 | 8 | return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane"); |
12764 | 4 | } |
12765 | 15 | case NEON::BI__builtin_neon_vld1_dup_v: |
12766 | 30 | case NEON::BI__builtin_neon_vld1q_dup_v: { |
12767 | 30 | Value *V = PoisonValue::get(Ty); |
12768 | 30 | Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], |
12769 | 30 | PtrOp0.getAlignment()); |
12770 | 30 | llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); |
12771 | 30 | Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); |
12772 | 30 | return EmitNeonSplat(Ops[0], CI); |
12773 | 15 | } |
12774 | 15 | case NEON::BI__builtin_neon_vst1_lane_v: |
12775 | 30 | case NEON::BI__builtin_neon_vst1q_lane_v: |
12776 | 30 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12777 | 30 | Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); |
12778 | 30 | return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); |
12779 | 4 | case NEON::BI__builtin_neon_vstl1_lane_s64: |
12780 | 8 | case NEON::BI__builtin_neon_vstl1q_lane_s64: { |
12781 | 8 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12782 | 8 | Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); |
12783 | 8 | llvm::StoreInst *SI = |
12784 | 8 | Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); |
12785 | 8 | SI->setAtomic(llvm::AtomicOrdering::Release); |
12786 | 8 | return SI; |
12787 | 4 | } |
12788 | 15 | case NEON::BI__builtin_neon_vld2_v: |
12789 | 30 | case NEON::BI__builtin_neon_vld2q_v: { |
12790 | 30 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
12791 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); |
12792 | 30 | Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); |
12793 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12794 | 15 | } |
12795 | 15 | case NEON::BI__builtin_neon_vld3_v: |
12796 | 30 | case NEON::BI__builtin_neon_vld3q_v: { |
12797 | 30 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
12798 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); |
12799 | 30 | Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); |
12800 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12801 | 15 | } |
12802 | 15 | case NEON::BI__builtin_neon_vld4_v: |
12803 | 30 | case NEON::BI__builtin_neon_vld4q_v: { |
12804 | 30 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
12805 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); |
12806 | 30 | Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); |
12807 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12808 | 15 | } |
12809 | 15 | case NEON::BI__builtin_neon_vld2_dup_v: |
12810 | 30 | case NEON::BI__builtin_neon_vld2q_dup_v: { |
12811 | 30 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
12812 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); |
12813 | 30 | Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); |
12814 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12815 | 15 | } |
12816 | 15 | case NEON::BI__builtin_neon_vld3_dup_v: |
12817 | 30 | case NEON::BI__builtin_neon_vld3q_dup_v: { |
12818 | 30 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
12819 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); |
12820 | 30 | Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); |
12821 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12822 | 15 | } |
12823 | 15 | case NEON::BI__builtin_neon_vld4_dup_v: |
12824 | 30 | case NEON::BI__builtin_neon_vld4q_dup_v: { |
12825 | 30 | llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; |
12826 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); |
12827 | 30 | Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); |
12828 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12829 | 15 | } |
12830 | 15 | case NEON::BI__builtin_neon_vld2_lane_v: |
12831 | 30 | case NEON::BI__builtin_neon_vld2q_lane_v: { |
12832 | 30 | llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; |
12833 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); |
12834 | 30 | std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); |
12835 | 30 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12836 | 30 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
12837 | 30 | Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); |
12838 | 30 | Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane"); |
12839 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12840 | 15 | } |
12841 | 15 | case NEON::BI__builtin_neon_vld3_lane_v: |
12842 | 30 | case NEON::BI__builtin_neon_vld3q_lane_v: { |
12843 | 30 | llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; |
12844 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); |
12845 | 30 | std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); |
12846 | 30 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12847 | 30 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
12848 | 30 | Ops[3] = Builder.CreateBitCast(Ops[3], Ty); |
12849 | 30 | Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); |
12850 | 30 | Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane"); |
12851 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12852 | 15 | } |
12853 | 15 | case NEON::BI__builtin_neon_vld4_lane_v: |
12854 | 30 | case NEON::BI__builtin_neon_vld4q_lane_v: { |
12855 | 30 | llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; |
12856 | 30 | Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); |
12857 | 30 | std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); |
12858 | 30 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12859 | 30 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
12860 | 30 | Ops[3] = Builder.CreateBitCast(Ops[3], Ty); |
12861 | 30 | Ops[4] = Builder.CreateBitCast(Ops[4], Ty); |
12862 | 30 | Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); |
12863 | 30 | Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane"); |
12864 | 30 | return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
12865 | 15 | } |
12866 | 15 | case NEON::BI__builtin_neon_vst2_v: |
12867 | 30 | case NEON::BI__builtin_neon_vst2q_v: { |
12868 | 30 | std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); |
12869 | 30 | llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; |
12870 | 30 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), |
12871 | 30 | Ops, ""); |
12872 | 15 | } |
12873 | 15 | case NEON::BI__builtin_neon_vst2_lane_v: |
12874 | 30 | case NEON::BI__builtin_neon_vst2q_lane_v: { |
12875 | 30 | std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); |
12876 | 30 | Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); |
12877 | 30 | llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; |
12878 | 30 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), |
12879 | 30 | Ops, ""); |
12880 | 15 | } |
12881 | 15 | case NEON::BI__builtin_neon_vst3_v: |
12882 | 30 | case NEON::BI__builtin_neon_vst3q_v: { |
12883 | 30 | std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); |
12884 | 30 | llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; |
12885 | 30 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), |
12886 | 30 | Ops, ""); |
12887 | 15 | } |
12888 | 15 | case NEON::BI__builtin_neon_vst3_lane_v: |
12889 | 30 | case NEON::BI__builtin_neon_vst3q_lane_v: { |
12890 | 30 | std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); |
12891 | 30 | Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); |
12892 | 30 | llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; |
12893 | 30 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), |
12894 | 30 | Ops, ""); |
12895 | 15 | } |
12896 | 15 | case NEON::BI__builtin_neon_vst4_v: |
12897 | 30 | case NEON::BI__builtin_neon_vst4q_v: { |
12898 | 30 | std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); |
12899 | 30 | llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; |
12900 | 30 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), |
12901 | 30 | Ops, ""); |
12902 | 15 | } |
12903 | 15 | case NEON::BI__builtin_neon_vst4_lane_v: |
12904 | 30 | case NEON::BI__builtin_neon_vst4q_lane_v: { |
12905 | 30 | std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); |
12906 | 30 | Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); |
12907 | 30 | llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; |
12908 | 30 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), |
12909 | 30 | Ops, ""); |
12910 | 15 | } |
12911 | 10 | case NEON::BI__builtin_neon_vtrn_v: |
12912 | 20 | case NEON::BI__builtin_neon_vtrnq_v: { |
12913 | 20 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12914 | 20 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
12915 | 20 | Value *SV = nullptr; |
12916 | | |
12917 | 60 | for (unsigned vi = 0; vi != 2; ++vi40 ) { |
12918 | 40 | SmallVector<int, 16> Indices; |
12919 | 178 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2138 ) { |
12920 | 138 | Indices.push_back(i+vi); |
12921 | 138 | Indices.push_back(i+e+vi); |
12922 | 138 | } |
12923 | 40 | Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); |
12924 | 40 | SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); |
12925 | 40 | SV = Builder.CreateDefaultAlignedStore(SV, Addr); |
12926 | 40 | } |
12927 | 20 | return SV; |
12928 | 10 | } |
12929 | 10 | case NEON::BI__builtin_neon_vuzp_v: |
12930 | 20 | case NEON::BI__builtin_neon_vuzpq_v: { |
12931 | 20 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12932 | 20 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
12933 | 20 | Value *SV = nullptr; |
12934 | | |
12935 | 60 | for (unsigned vi = 0; vi != 2; ++vi40 ) { |
12936 | 40 | SmallVector<int, 16> Indices; |
12937 | 316 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i276 ) |
12938 | 276 | Indices.push_back(2*i+vi); |
12939 | | |
12940 | 40 | Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); |
12941 | 40 | SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); |
12942 | 40 | SV = Builder.CreateDefaultAlignedStore(SV, Addr); |
12943 | 40 | } |
12944 | 20 | return SV; |
12945 | 10 | } |
12946 | 10 | case NEON::BI__builtin_neon_vzip_v: |
12947 | 22 | case NEON::BI__builtin_neon_vzipq_v: { |
12948 | 22 | Ops[1] = Builder.CreateBitCast(Ops[1], Ty); |
12949 | 22 | Ops[2] = Builder.CreateBitCast(Ops[2], Ty); |
12950 | 22 | Value *SV = nullptr; |
12951 | | |
12952 | 66 | for (unsigned vi = 0; vi != 2; ++vi44 ) { |
12953 | 44 | SmallVector<int, 16> Indices; |
12954 | 214 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2170 ) { |
12955 | 170 | Indices.push_back((i + vi*e) >> 1); |
12956 | 170 | Indices.push_back(((i + vi*e) >> 1)+e); |
12957 | 170 | } |
12958 | 44 | Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); |
12959 | 44 | SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); |
12960 | 44 | SV = Builder.CreateDefaultAlignedStore(SV, Addr); |
12961 | 44 | } |
12962 | 22 | return SV; |
12963 | 10 | } |
12964 | 0 | case NEON::BI__builtin_neon_vqtbl1q_v: { |
12965 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), |
12966 | 0 | Ops, "vtbl1"); |
12967 | 10 | } |
12968 | 0 | case NEON::BI__builtin_neon_vqtbl2q_v: { |
12969 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), |
12970 | 0 | Ops, "vtbl2"); |
12971 | 10 | } |
12972 | 0 | case NEON::BI__builtin_neon_vqtbl3q_v: { |
12973 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), |
12974 | 0 | Ops, "vtbl3"); |
12975 | 10 | } |
12976 | 0 | case NEON::BI__builtin_neon_vqtbl4q_v: { |
12977 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), |
12978 | 0 | Ops, "vtbl4"); |
12979 | 10 | } |
12980 | 0 | case NEON::BI__builtin_neon_vqtbx1q_v: { |
12981 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), |
12982 | 0 | Ops, "vtbx1"); |
12983 | 10 | } |
12984 | 0 | case NEON::BI__builtin_neon_vqtbx2q_v: { |
12985 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), |
12986 | 0 | Ops, "vtbx2"); |
12987 | 10 | } |
12988 | 0 | case NEON::BI__builtin_neon_vqtbx3q_v: { |
12989 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), |
12990 | 0 | Ops, "vtbx3"); |
12991 | 10 | } |
12992 | 0 | case NEON::BI__builtin_neon_vqtbx4q_v: { |
12993 | 0 | return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), |
12994 | 0 | Ops, "vtbx4"); |
12995 | 10 | } |
12996 | 4 | case NEON::BI__builtin_neon_vsqadd_v: |
12997 | 8 | case NEON::BI__builtin_neon_vsqaddq_v: { |
12998 | 8 | Int = Intrinsic::aarch64_neon_usqadd; |
12999 | 8 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); |
13000 | 4 | } |
13001 | 7 | case NEON::BI__builtin_neon_vuqadd_v: |
13002 | 15 | case NEON::BI__builtin_neon_vuqaddq_v: { |
13003 | 15 | Int = Intrinsic::aarch64_neon_suqadd; |
13004 | 15 | return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); |
13005 | 7 | } |
13006 | 1.40k | } |
13007 | 1.40k | } |
13008 | | |
13009 | | Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, |
13010 | 0 | const CallExpr *E) { |
13011 | 0 | assert((BuiltinID == BPF::BI__builtin_preserve_field_info || |
13012 | 0 | BuiltinID == BPF::BI__builtin_btf_type_id || |
13013 | 0 | BuiltinID == BPF::BI__builtin_preserve_type_info || |
13014 | 0 | BuiltinID == BPF::BI__builtin_preserve_enum_value) && |
13015 | 0 | "unexpected BPF builtin"); |
13016 | | |
13017 | | // A sequence number, injected into IR builtin functions, to |
13018 | | // prevent CSE given the only difference of the function |
13019 | | // may just be the debuginfo metadata. |
13020 | 0 | static uint32_t BuiltinSeqNum; |
13021 | |
|
13022 | 0 | switch (BuiltinID) { |
13023 | 0 | default: |
13024 | 0 | llvm_unreachable("Unexpected BPF builtin"); |
13025 | 0 | case BPF::BI__builtin_preserve_field_info: { |
13026 | 0 | const Expr *Arg = E->getArg(0); |
13027 | 0 | bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; |
13028 | |
|
13029 | 0 | if (!getDebugInfo()) { |
13030 | 0 | CGM.Error(E->getExprLoc(), |
13031 | 0 | "using __builtin_preserve_field_info() without -g"); |
13032 | 0 | return IsBitField ? EmitLValue(Arg).getBitFieldPointer() |
13033 | 0 | : EmitLValue(Arg).getPointer(*this); |
13034 | 0 | } |
13035 | | |
13036 | | // Enable underlying preserve_*_access_index() generation. |
13037 | 0 | bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; |
13038 | 0 | IsInPreservedAIRegion = true; |
13039 | 0 | Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() |
13040 | 0 | : EmitLValue(Arg).getPointer(*this); |
13041 | 0 | IsInPreservedAIRegion = OldIsInPreservedAIRegion; |
13042 | |
|
13043 | 0 | ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); |
13044 | 0 | Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); |
13045 | | |
13046 | | // Built the IR for the preserve_field_info intrinsic. |
13047 | 0 | llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( |
13048 | 0 | &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, |
13049 | 0 | {FieldAddr->getType()}); |
13050 | 0 | return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); |
13051 | 0 | } |
13052 | 0 | case BPF::BI__builtin_btf_type_id: |
13053 | 0 | case BPF::BI__builtin_preserve_type_info: { |
13054 | 0 | if (!getDebugInfo()) { |
13055 | 0 | CGM.Error(E->getExprLoc(), "using builtin function without -g"); |
13056 | 0 | return nullptr; |
13057 | 0 | } |
13058 | | |
13059 | 0 | const Expr *Arg0 = E->getArg(0); |
13060 | 0 | llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( |
13061 | 0 | Arg0->getType(), Arg0->getExprLoc()); |
13062 | |
|
13063 | 0 | ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); |
13064 | 0 | Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); |
13065 | 0 | Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); |
13066 | |
|
13067 | 0 | llvm::Function *FnDecl; |
13068 | 0 | if (BuiltinID == BPF::BI__builtin_btf_type_id) |
13069 | 0 | FnDecl = llvm::Intrinsic::getDeclaration( |
13070 | 0 | &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {}); |
13071 | 0 | else |
13072 | 0 | FnDecl = llvm::Intrinsic::getDeclaration( |
13073 | 0 | &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {}); |
13074 | 0 | CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue}); |
13075 | 0 | Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); |
13076 | 0 | return Fn; |
13077 | 0 | } |
13078 | 0 | case BPF::BI__builtin_preserve_enum_value: { |
13079 | 0 | if (!getDebugInfo()) { |
13080 | 0 | CGM.Error(E->getExprLoc(), "using builtin function without -g"); |
13081 | 0 | return nullptr; |
13082 | 0 | } |
13083 | | |
13084 | 0 | const Expr *Arg0 = E->getArg(0); |
13085 | 0 | llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( |
13086 | 0 | Arg0->getType(), Arg0->getExprLoc()); |
13087 | | |
13088 | | // Find enumerator |
13089 | 0 | const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens()); |
13090 | 0 | const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr()); |
13091 | 0 | const auto *DR = cast<DeclRefExpr>(CE->getSubExpr()); |
13092 | 0 | const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl()); |
13093 | |
|
13094 | 0 | auto &InitVal = Enumerator->getInitVal(); |
13095 | 0 | std::string InitValStr; |
13096 | 0 | if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX)) |
13097 | 0 | InitValStr = std::to_string(InitVal.getSExtValue()); |
13098 | 0 | else |
13099 | 0 | InitValStr = std::to_string(InitVal.getZExtValue()); |
13100 | 0 | std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr; |
13101 | 0 | Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr); |
13102 | |
|
13103 | 0 | ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); |
13104 | 0 | Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); |
13105 | 0 | Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); |
13106 | |
|
13107 | 0 | llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration( |
13108 | 0 | &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {}); |
13109 | 0 | CallInst *Fn = |
13110 | 0 | Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue}); |
13111 | 0 | Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); |
13112 | 0 | return Fn; |
13113 | 0 | } |
13114 | 0 | } |
13115 | 0 | } |
13116 | | |
13117 | | llvm::Value *CodeGenFunction:: |
13118 | 94 | BuildVector(ArrayRef<llvm::Value*> Ops) { |
13119 | 94 | assert((Ops.size() & (Ops.size() - 1)) == 0 && |
13120 | 94 | "Not a power-of-two sized vector!"); |
13121 | 94 | bool AllConstants = true; |
13122 | 216 | for (unsigned i = 0, e = Ops.size(); i != e && AllConstants210 ; ++i122 ) |
13123 | 122 | AllConstants &= isa<Constant>(Ops[i]); |
13124 | | |
13125 | | // If this is a constant vector, create a ConstantVector. |
13126 | 94 | if (AllConstants) { |
13127 | 4 | SmallVector<llvm::Constant*, 16> CstOps; |
13128 | 34 | for (unsigned i = 0, e = Ops.size(); i != e; ++i30 ) |
13129 | 30 | CstOps.push_back(cast<Constant>(Ops[i])); |
13130 | 4 | return llvm::ConstantVector::get(CstOps); |
13131 | 4 | } |
13132 | | |
13133 | | // Otherwise, insertelement the values to build the vector. |
13134 | 90 | Value *Result = llvm::PoisonValue::get( |
13135 | 90 | llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size())); |
13136 | | |
13137 | 686 | for (unsigned i = 0, e = Ops.size(); i != e; ++i596 ) |
13138 | 596 | Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i)); |
13139 | | |
13140 | 90 | return Result; |
13141 | 94 | } |
13142 | | |
13143 | | // Convert the mask from an integer type to a vector of i1. |
13144 | | static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, |
13145 | 4.74k | unsigned NumElts) { |
13146 | | |
13147 | 4.74k | auto *MaskTy = llvm::FixedVectorType::get( |
13148 | 4.74k | CGF.Builder.getInt1Ty(), |
13149 | 4.74k | cast<IntegerType>(Mask->getType())->getBitWidth()); |
13150 | 4.74k | Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); |
13151 | | |
13152 | | // If we have less than 8 elements, then the starting mask was an i8 and |
13153 | | // we need to extract down to the right number of elements. |
13154 | 4.74k | if (NumElts < 8) { |
13155 | 1.06k | int Indices[4]; |
13156 | 4.63k | for (unsigned i = 0; i != NumElts; ++i3.57k ) |
13157 | 3.57k | Indices[i] = i; |
13158 | 1.06k | MaskVec = CGF.Builder.CreateShuffleVector( |
13159 | 1.06k | MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract"); |
13160 | 1.06k | } |
13161 | 4.74k | return MaskVec; |
13162 | 4.74k | } |
13163 | | |
13164 | | static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, |
13165 | 45 | Align Alignment) { |
13166 | 45 | Value *Ptr = Ops[0]; |
13167 | | |
13168 | 45 | Value *MaskVec = getMaskVecValue( |
13169 | 45 | CGF, Ops[2], |
13170 | 45 | cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements()); |
13171 | | |
13172 | 45 | return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec); |
13173 | 45 | } |
13174 | | |
13175 | | static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops, |
13176 | 94 | Align Alignment) { |
13177 | 94 | llvm::Type *Ty = Ops[1]->getType(); |
13178 | 94 | Value *Ptr = Ops[0]; |
13179 | | |
13180 | 94 | Value *MaskVec = getMaskVecValue( |
13181 | 94 | CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements()); |
13182 | | |
13183 | 94 | return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]); |
13184 | 94 | } |
13185 | | |
13186 | | static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, |
13187 | 44 | ArrayRef<Value *> Ops) { |
13188 | 44 | auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); |
13189 | 44 | Value *Ptr = Ops[0]; |
13190 | | |
13191 | 44 | Value *MaskVec = getMaskVecValue( |
13192 | 44 | CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements()); |
13193 | | |
13194 | 44 | llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload, |
13195 | 44 | ResultTy); |
13196 | 44 | return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); |
13197 | 44 | } |
13198 | | |
13199 | | static Value *EmitX86CompressExpand(CodeGenFunction &CGF, |
13200 | | ArrayRef<Value *> Ops, |
13201 | 88 | bool IsCompress) { |
13202 | 88 | auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType()); |
13203 | | |
13204 | 88 | Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); |
13205 | | |
13206 | 88 | Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress44 |
13207 | 88 | : Intrinsic::x86_avx512_mask_expand44 ; |
13208 | 88 | llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); |
13209 | 88 | return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); |
13210 | 88 | } |
13211 | | |
13212 | | static Value *EmitX86CompressStore(CodeGenFunction &CGF, |
13213 | 22 | ArrayRef<Value *> Ops) { |
13214 | 22 | auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType()); |
13215 | 22 | Value *Ptr = Ops[0]; |
13216 | | |
13217 | 22 | Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); |
13218 | | |
13219 | 22 | llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore, |
13220 | 22 | ResultTy); |
13221 | 22 | return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec }); |
13222 | 22 | } |
13223 | | |
13224 | | static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, |
13225 | | ArrayRef<Value *> Ops, |
13226 | 68 | bool InvertLHS = false) { |
13227 | 68 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
13228 | 68 | Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); |
13229 | 68 | Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); |
13230 | | |
13231 | 68 | if (InvertLHS) |
13232 | 14 | LHS = CGF.Builder.CreateNot(LHS); |
13233 | | |
13234 | 68 | return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), |
13235 | 68 | Ops[0]->getType()); |
13236 | 68 | } |
13237 | | |
13238 | | static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, |
13239 | 168 | Value *Amt, bool IsRight) { |
13240 | 168 | llvm::Type *Ty = Op0->getType(); |
13241 | | |
13242 | | // Amount may be scalar immediate, in which case create a splat vector. |
13243 | | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
13244 | | // we only care about the lowest log2 bits anyway. |
13245 | 168 | if (Amt->getType() != Ty) { |
13246 | 118 | unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements(); |
13247 | 118 | Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); |
13248 | 118 | Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); |
13249 | 118 | } |
13250 | | |
13251 | 168 | unsigned IID = IsRight ? Intrinsic::fshr68 : Intrinsic::fshl100 ; |
13252 | 168 | Function *F = CGF.CGM.getIntrinsic(IID, Ty); |
13253 | 168 | return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); |
13254 | 168 | } |
13255 | | |
13256 | | static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops, |
13257 | 288 | bool IsSigned) { |
13258 | 288 | Value *Op0 = Ops[0]; |
13259 | 288 | Value *Op1 = Ops[1]; |
13260 | 288 | llvm::Type *Ty = Op0->getType(); |
13261 | 288 | uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; |
13262 | | |
13263 | 288 | CmpInst::Predicate Pred; |
13264 | 288 | switch (Imm) { |
13265 | 64 | case 0x0: |
13266 | 64 | Pred = IsSigned ? ICmpInst::ICMP_SLT32 : ICmpInst::ICMP_ULT32 ; |
13267 | 64 | break; |
13268 | 32 | case 0x1: |
13269 | 32 | Pred = IsSigned ? ICmpInst::ICMP_SLE16 : ICmpInst::ICMP_ULE16 ; |
13270 | 32 | break; |
13271 | 32 | case 0x2: |
13272 | 32 | Pred = IsSigned ? ICmpInst::ICMP_SGT16 : ICmpInst::ICMP_UGT16 ; |
13273 | 32 | break; |
13274 | 32 | case 0x3: |
13275 | 32 | Pred = IsSigned ? ICmpInst::ICMP_SGE16 : ICmpInst::ICMP_UGE16 ; |
13276 | 32 | break; |
13277 | 32 | case 0x4: |
13278 | 32 | Pred = ICmpInst::ICMP_EQ; |
13279 | 32 | break; |
13280 | 32 | case 0x5: |
13281 | 32 | Pred = ICmpInst::ICMP_NE; |
13282 | 32 | break; |
13283 | 32 | case 0x6: |
13284 | 32 | return llvm::Constant::getNullValue(Ty); // FALSE |
13285 | 32 | case 0x7: |
13286 | 32 | return llvm::Constant::getAllOnesValue(Ty); // TRUE |
13287 | 0 | default: |
13288 | 0 | llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); |
13289 | 288 | } |
13290 | | |
13291 | 224 | Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); |
13292 | 224 | Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); |
13293 | 224 | return Res; |
13294 | 288 | } |
13295 | | |
13296 | | static Value *EmitX86Select(CodeGenFunction &CGF, |
13297 | 2.92k | Value *Mask, Value *Op0, Value *Op1) { |
13298 | | |
13299 | | // If the mask is all ones just return first argument. |
13300 | 2.92k | if (const auto *C = dyn_cast<Constant>(Mask)) |
13301 | 143 | if (C->isAllOnesValue()) |
13302 | 143 | return Op0; |
13303 | | |
13304 | 2.78k | Mask = getMaskVecValue( |
13305 | 2.78k | CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements()); |
13306 | | |
13307 | 2.78k | return CGF.Builder.CreateSelect(Mask, Op0, Op1); |
13308 | 2.92k | } |
13309 | | |
13310 | | static Value *EmitX86ScalarSelect(CodeGenFunction &CGF, |
13311 | 229 | Value *Mask, Value *Op0, Value *Op1) { |
13312 | | // If the mask is all ones just return first argument. |
13313 | 229 | if (const auto *C = dyn_cast<Constant>(Mask)) |
13314 | 25 | if (C->isAllOnesValue()) |
13315 | 25 | return Op0; |
13316 | | |
13317 | 204 | auto *MaskTy = llvm::FixedVectorType::get( |
13318 | 204 | CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth()); |
13319 | 204 | Mask = CGF.Builder.CreateBitCast(Mask, MaskTy); |
13320 | 204 | Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0); |
13321 | 204 | return CGF.Builder.CreateSelect(Mask, Op0, Op1); |
13322 | 229 | } |
13323 | | |
13324 | | static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, |
13325 | 2.24k | unsigned NumElts, Value *MaskIn) { |
13326 | 2.24k | if (MaskIn) { |
13327 | 1.75k | const auto *C = dyn_cast<Constant>(MaskIn); |
13328 | 1.75k | if (!C || !C->isAllOnesValue()946 ) |
13329 | 806 | Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); |
13330 | 1.75k | } |
13331 | | |
13332 | 2.24k | if (NumElts < 8) { |
13333 | 501 | int Indices[8]; |
13334 | 2.17k | for (unsigned i = 0; i != NumElts; ++i1.67k ) |
13335 | 1.67k | Indices[i] = i; |
13336 | 2.83k | for (unsigned i = NumElts; i != 8; ++i2.33k ) |
13337 | 2.33k | Indices[i] = i % NumElts + NumElts; |
13338 | 501 | Cmp = CGF.Builder.CreateShuffleVector( |
13339 | 501 | Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); |
13340 | 501 | } |
13341 | | |
13342 | 2.24k | return CGF.Builder.CreateBitCast(Cmp, |
13343 | 2.24k | IntegerType::get(CGF.getLLVMContext(), |
13344 | 2.24k | std::max(NumElts, 8U))); |
13345 | 2.24k | } |
13346 | | |
13347 | | static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, |
13348 | 969 | bool Signed, ArrayRef<Value *> Ops) { |
13349 | 969 | assert((Ops.size() == 2 || Ops.size() == 4) && |
13350 | 969 | "Unexpected number of arguments"); |
13351 | 969 | unsigned NumElts = |
13352 | 969 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
13353 | 969 | Value *Cmp; |
13354 | | |
13355 | 969 | if (CC == 3) { |
13356 | 0 | Cmp = Constant::getNullValue( |
13357 | 0 | llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); |
13358 | 969 | } else if (CC == 7) { |
13359 | 0 | Cmp = Constant::getAllOnesValue( |
13360 | 0 | llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); |
13361 | 969 | } else { |
13362 | 969 | ICmpInst::Predicate Pred; |
13363 | 969 | switch (CC) { |
13364 | 0 | default: llvm_unreachable("Unknown condition code"); |
13365 | 199 | case 0: Pred = ICmpInst::ICMP_EQ; break; |
13366 | 100 | case 1: Pred = Signed ? ICmpInst::ICMP_SLT60 : ICmpInst::ICMP_ULT40 ; break; |
13367 | 81 | case 2: Pred = Signed ? ICmpInst::ICMP_SLE41 : ICmpInst::ICMP_ULE40 ; break; |
13368 | 429 | case 4: Pred = ICmpInst::ICMP_NE; break; |
13369 | 80 | case 5: Pred = Signed ? ICmpInst::ICMP_SGE40 : ICmpInst::ICMP_UGE40 ; break; |
13370 | 80 | case 6: Pred = Signed ? ICmpInst::ICMP_SGT40 : ICmpInst::ICMP_UGT40 ; break; |
13371 | 969 | } |
13372 | 969 | Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); |
13373 | 969 | } |
13374 | | |
13375 | 969 | Value *MaskIn = nullptr; |
13376 | 969 | if (Ops.size() == 4) |
13377 | 951 | MaskIn = Ops[3]; |
13378 | | |
13379 | 969 | return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); |
13380 | 969 | } |
13381 | | |
13382 | 18 | static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { |
13383 | 18 | Value *Zero = Constant::getNullValue(In->getType()); |
13384 | 18 | return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); |
13385 | 18 | } |
13386 | | |
13387 | | static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, |
13388 | 138 | ArrayRef<Value *> Ops, bool IsSigned) { |
13389 | 138 | unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); |
13390 | 138 | llvm::Type *Ty = Ops[1]->getType(); |
13391 | | |
13392 | 138 | Value *Res; |
13393 | 138 | if (Rnd != 4) { |
13394 | 90 | Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round45 |
13395 | 90 | : Intrinsic::x86_avx512_uitofp_round45 ; |
13396 | 90 | Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); |
13397 | 90 | Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); |
13398 | 90 | } else { |
13399 | 48 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
13400 | 48 | Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)24 |
13401 | 48 | : CGF.Builder.CreateUIToFP(Ops[0], Ty)24 ; |
13402 | 48 | } |
13403 | | |
13404 | 138 | return EmitX86Select(CGF, Ops[2], Res, Ops[1]); |
13405 | 138 | } |
13406 | | |
13407 | | // Lowers X86 FMA intrinsics to IR. |
13408 | | static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, |
13409 | | ArrayRef<Value *> Ops, unsigned BuiltinID, |
13410 | 433 | bool IsAddSub) { |
13411 | | |
13412 | 433 | bool Subtract = false; |
13413 | 433 | Intrinsic::ID IID = Intrinsic::not_intrinsic; |
13414 | 433 | switch (BuiltinID) { |
13415 | 193 | default: break; |
13416 | 193 | case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: |
13417 | 4 | Subtract = true; |
13418 | 4 | [[fallthrough]]; |
13419 | 20 | case clang::X86::BI__builtin_ia32_vfmaddph512_mask: |
13420 | 28 | case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: |
13421 | 32 | case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: |
13422 | 32 | IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512; |
13423 | 32 | break; |
13424 | 2 | case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: |
13425 | 2 | Subtract = true; |
13426 | 2 | [[fallthrough]]; |
13427 | 10 | case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: |
13428 | 14 | case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: |
13429 | 16 | case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: |
13430 | 16 | IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512; |
13431 | 16 | break; |
13432 | 8 | case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: |
13433 | 8 | Subtract = true; |
13434 | 8 | [[fallthrough]]; |
13435 | 40 | case clang::X86::BI__builtin_ia32_vfmaddps512_mask: |
13436 | 56 | case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: |
13437 | 64 | case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: |
13438 | 64 | IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; |
13439 | 8 | case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: |
13440 | 8 | Subtract = true; |
13441 | 8 | [[fallthrough]]; |
13442 | 40 | case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: |
13443 | 56 | case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: |
13444 | 64 | case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: |
13445 | 64 | IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; |
13446 | 4 | case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: |
13447 | 4 | Subtract = true; |
13448 | 4 | [[fallthrough]]; |
13449 | 20 | case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: |
13450 | 28 | case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: |
13451 | 32 | case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: |
13452 | 32 | IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; |
13453 | 32 | break; |
13454 | 4 | case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: |
13455 | 4 | Subtract = true; |
13456 | 4 | [[fallthrough]]; |
13457 | 20 | case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: |
13458 | 28 | case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: |
13459 | 32 | case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: |
13460 | 32 | IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; |
13461 | 32 | break; |
13462 | 433 | } |
13463 | | |
13464 | 433 | Value *A = Ops[0]; |
13465 | 433 | Value *B = Ops[1]; |
13466 | 433 | Value *C = Ops[2]; |
13467 | | |
13468 | 433 | if (Subtract) |
13469 | 30 | C = CGF.Builder.CreateFNeg(C); |
13470 | | |
13471 | 433 | Value *Res; |
13472 | | |
13473 | | // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). |
13474 | 433 | if (IID != Intrinsic::not_intrinsic && |
13475 | 433 | (240 cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4240 || |
13476 | 240 | IsAddSub120 )) { |
13477 | 160 | Function *Intr = CGF.CGM.getIntrinsic(IID); |
13478 | 160 | Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); |
13479 | 273 | } else { |
13480 | 273 | llvm::Type *Ty = A->getType(); |
13481 | 273 | Function *FMA; |
13482 | 273 | if (CGF.Builder.getIsFPConstrained()) { |
13483 | 32 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
13484 | 32 | FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty); |
13485 | 32 | Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C}); |
13486 | 241 | } else { |
13487 | 241 | FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); |
13488 | 241 | Res = CGF.Builder.CreateCall(FMA, {A, B, C}); |
13489 | 241 | } |
13490 | 273 | } |
13491 | | |
13492 | | // Handle any required masking. |
13493 | 433 | Value *MaskFalseVal = nullptr; |
13494 | 433 | switch (BuiltinID) { |
13495 | 16 | case clang::X86::BI__builtin_ia32_vfmaddph512_mask: |
13496 | 48 | case clang::X86::BI__builtin_ia32_vfmaddps512_mask: |
13497 | 80 | case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: |
13498 | 88 | case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: |
13499 | 104 | case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: |
13500 | 120 | case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: |
13501 | 120 | MaskFalseVal = Ops[0]; |
13502 | 120 | break; |
13503 | 8 | case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: |
13504 | 24 | case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: |
13505 | 40 | case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: |
13506 | 44 | case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: |
13507 | 52 | case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: |
13508 | 60 | case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: |
13509 | 60 | MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); |
13510 | 60 | break; |
13511 | 4 | case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: |
13512 | 8 | case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: |
13513 | 16 | case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: |
13514 | 24 | case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: |
13515 | 32 | case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: |
13516 | 40 | case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: |
13517 | 42 | case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: |
13518 | 44 | case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: |
13519 | 48 | case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: |
13520 | 52 | case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: |
13521 | 56 | case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: |
13522 | 60 | case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: |
13523 | 60 | MaskFalseVal = Ops[2]; |
13524 | 60 | break; |
13525 | 433 | } |
13526 | | |
13527 | 433 | if (MaskFalseVal) |
13528 | 240 | return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); |
13529 | | |
13530 | 193 | return Res; |
13531 | 433 | } |
13532 | | |
13533 | | static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, |
13534 | | MutableArrayRef<Value *> Ops, Value *Upper, |
13535 | | bool ZeroMask = false, unsigned PTIdx = 0, |
13536 | 200 | bool NegAcc = false) { |
13537 | 200 | unsigned Rnd = 4; |
13538 | 200 | if (Ops.size() > 4) |
13539 | 144 | Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); |
13540 | | |
13541 | 200 | if (NegAcc) |
13542 | 20 | Ops[2] = CGF.Builder.CreateFNeg(Ops[2]); |
13543 | | |
13544 | 200 | Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0); |
13545 | 200 | Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0); |
13546 | 200 | Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0); |
13547 | 200 | Value *Res; |
13548 | 200 | if (Rnd != 4) { |
13549 | 80 | Intrinsic::ID IID; |
13550 | | |
13551 | 80 | switch (Ops[0]->getType()->getPrimitiveSizeInBits()) { |
13552 | 16 | case 16: |
13553 | 16 | IID = Intrinsic::x86_avx512fp16_vfmadd_f16; |
13554 | 16 | break; |
13555 | 32 | case 32: |
13556 | 32 | IID = Intrinsic::x86_avx512_vfmadd_f32; |
13557 | 32 | break; |
13558 | 32 | case 64: |
13559 | 32 | IID = Intrinsic::x86_avx512_vfmadd_f64; |
13560 | 32 | break; |
13561 | 0 | default: |
13562 | 0 | llvm_unreachable("Unexpected size"); |
13563 | 80 | } |
13564 | 80 | Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), |
13565 | 80 | {Ops[0], Ops[1], Ops[2], Ops[4]}); |
13566 | 120 | } else if (CGF.Builder.getIsFPConstrained()) { |
13567 | 16 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); |
13568 | 16 | Function *FMA = CGF.CGM.getIntrinsic( |
13569 | 16 | Intrinsic::experimental_constrained_fma, Ops[0]->getType()); |
13570 | 16 | Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3)); |
13571 | 104 | } else { |
13572 | 104 | Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType()); |
13573 | 104 | Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3)); |
13574 | 104 | } |
13575 | | // If we have more than 3 arguments, we need to do masking. |
13576 | 200 | if (Ops.size() > 3) { |
13577 | 144 | Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())40 |
13578 | 144 | : Ops[PTIdx]104 ; |
13579 | | |
13580 | | // If we negated the accumulator and the its the PassThru value we need to |
13581 | | // bypass the negate. Conveniently Upper should be the same thing in this |
13582 | | // case. |
13583 | 144 | if (NegAcc && PTIdx == 220 ) |
13584 | 20 | PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0); |
13585 | | |
13586 | 144 | Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru); |
13587 | 144 | } |
13588 | 200 | return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0); |
13589 | 200 | } |
13590 | | |
13591 | | static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, |
13592 | 27 | ArrayRef<Value *> Ops) { |
13593 | 27 | llvm::Type *Ty = Ops[0]->getType(); |
13594 | | // Arguments have a vXi32 type so cast to vXi64. |
13595 | 27 | Ty = llvm::FixedVectorType::get(CGF.Int64Ty, |
13596 | 27 | Ty->getPrimitiveSizeInBits() / 64); |
13597 | 27 | Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty); |
13598 | 27 | Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty); |
13599 | | |
13600 | 27 | if (IsSigned) { |
13601 | | // Shift left then arithmetic shift right. |
13602 | 12 | Constant *ShiftAmt = ConstantInt::get(Ty, 32); |
13603 | 12 | LHS = CGF.Builder.CreateShl(LHS, ShiftAmt); |
13604 | 12 | LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt); |
13605 | 12 | RHS = CGF.Builder.CreateShl(RHS, ShiftAmt); |
13606 | 12 | RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt); |
13607 | 15 | } else { |
13608 | | // Clear the upper bits. |
13609 | 15 | Constant *Mask = ConstantInt::get(Ty, 0xffffffff); |
13610 | 15 | LHS = CGF.Builder.CreateAnd(LHS, Mask); |
13611 | 15 | RHS = CGF.Builder.CreateAnd(RHS, Mask); |
13612 | 15 | } |
13613 | | |
13614 | 27 | return CGF.Builder.CreateMul(LHS, RHS); |
13615 | 27 | } |
13616 | | |
13617 | | // Emit a masked pternlog intrinsic. This only exists because the header has to |
13618 | | // use a macro and we aren't able to pass the input argument to a pternlog |
13619 | | // builtin and a select builtin without evaluating it twice. |
13620 | | static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, |
13621 | 24 | ArrayRef<Value *> Ops) { |
13622 | 24 | llvm::Type *Ty = Ops[0]->getType(); |
13623 | | |
13624 | 24 | unsigned VecWidth = Ty->getPrimitiveSizeInBits(); |
13625 | 24 | unsigned EltWidth = Ty->getScalarSizeInBits(); |
13626 | 24 | Intrinsic::ID IID; |
13627 | 24 | if (VecWidth == 128 && EltWidth == 326 ) |
13628 | 3 | IID = Intrinsic::x86_avx512_pternlog_d_128; |
13629 | 21 | else if (VecWidth == 256 && EltWidth == 326 ) |
13630 | 3 | IID = Intrinsic::x86_avx512_pternlog_d_256; |
13631 | 18 | else if (VecWidth == 512 && EltWidth == 3212 ) |
13632 | 6 | IID = Intrinsic::x86_avx512_pternlog_d_512; |
13633 | 12 | else if (VecWidth == 128 && EltWidth == 643 ) |
13634 | 3 | IID = Intrinsic::x86_avx512_pternlog_q_128; |
13635 | 9 | else if (VecWidth == 256 && EltWidth == 643 ) |
13636 | 3 | IID = Intrinsic::x86_avx512_pternlog_q_256; |
13637 | 6 | else if (VecWidth == 512 && EltWidth == 64) |
13638 | 6 | IID = Intrinsic::x86_avx512_pternlog_q_512; |
13639 | 0 | else |
13640 | 0 | llvm_unreachable("Unexpected intrinsic"); |
13641 | | |
13642 | 24 | Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), |
13643 | 24 | Ops.drop_back()); |
13644 | 24 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)8 : Ops[0]16 ; |
13645 | 24 | return EmitX86Select(CGF, Ops[4], Ternlog, PassThru); |
13646 | 24 | } |
13647 | | |
13648 | | static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, |
13649 | 18 | llvm::Type *DstTy) { |
13650 | 18 | unsigned NumberOfElements = |
13651 | 18 | cast<llvm::FixedVectorType>(DstTy)->getNumElements(); |
13652 | 18 | Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); |
13653 | 18 | return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); |
13654 | 18 | } |
13655 | | |
13656 | 65 | Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { |
13657 | 65 | const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); |
13658 | 65 | StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); |
13659 | 65 | return EmitX86CpuIs(CPUStr); |
13660 | 65 | } |
13661 | | |
13662 | | // Convert F16 halfs to floats. |
13663 | | static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, |
13664 | | ArrayRef<Value *> Ops, |
13665 | 74 | llvm::Type *DstTy) { |
13666 | 74 | assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) && |
13667 | 74 | "Unknown cvtph2ps intrinsic"); |
13668 | | |
13669 | | // If the SAE intrinsic doesn't use default rounding then we can't upgrade. |
13670 | 74 | if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 448 ) { |
13671 | 24 | Function *F = |
13672 | 24 | CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512); |
13673 | 24 | return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]}); |
13674 | 24 | } |
13675 | | |
13676 | 50 | unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements(); |
13677 | 50 | Value *Src = Ops[0]; |
13678 | | |
13679 | | // Extract the subvector. |
13680 | 50 | if (NumDstElts != |
13681 | 50 | cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) { |
13682 | 14 | assert(NumDstElts == 4 && "Unexpected vector size"); |
13683 | 14 | Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3}); |
13684 | 14 | } |
13685 | | |
13686 | | // Bitcast from vXi16 to vXf16. |
13687 | 50 | auto *HalfTy = llvm::FixedVectorType::get( |
13688 | 50 | llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts); |
13689 | 50 | Src = CGF.Builder.CreateBitCast(Src, HalfTy); |
13690 | | |
13691 | | // Perform the fp-extension. |
13692 | 50 | Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps"); |
13693 | | |
13694 | 50 | if (Ops.size() >= 3) |
13695 | 44 | Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]); |
13696 | 50 | return Res; |
13697 | 50 | } |
13698 | | |
13699 | 201 | Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { |
13700 | | |
13701 | 201 | llvm::Type *Int32Ty = Builder.getInt32Ty(); |
13702 | | |
13703 | | // Matching the struct layout from the compiler-rt/libgcc structure that is |
13704 | | // filled in: |
13705 | | // unsigned int __cpu_vendor; |
13706 | | // unsigned int __cpu_type; |
13707 | | // unsigned int __cpu_subtype; |
13708 | | // unsigned int __cpu_features[1]; |
13709 | 201 | llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, |
13710 | 201 | llvm::ArrayType::get(Int32Ty, 1)); |
13711 | | |
13712 | | // Grab the global __cpu_model. |
13713 | 201 | llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); |
13714 | 201 | cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); |
13715 | | |
13716 | | // Calculate the index needed to access the correct field based on the |
13717 | | // range. Also adjust the expected value. |
13718 | 201 | unsigned Index; |
13719 | 201 | unsigned Value; |
13720 | 201 | std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) |
13721 | 201 | #define X86_VENDOR(ENUM, STRING) \ |
13722 | 402 | .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)}) |
13723 | 201 | #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \ |
13724 | 804 | .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) |
13725 | 201 | #define X86_CPU_TYPE(ENUM, STR) \ |
13726 | 3.81k | .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) |
13727 | 201 | #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \ |
13728 | 1.00k | .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) |
13729 | 201 | #define X86_CPU_SUBTYPE(ENUM, STR) \ |
13730 | 6.83k | .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) |
13731 | 201 | #include "llvm/TargetParser/X86TargetParser.def" |
13732 | 201 | .Default({0, 0}); |
13733 | 201 | assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); |
13734 | | |
13735 | | // Grab the appropriate field from __cpu_model. |
13736 | 201 | llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), |
13737 | 201 | ConstantInt::get(Int32Ty, Index)}; |
13738 | 201 | llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); |
13739 | 201 | CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue, |
13740 | 201 | CharUnits::fromQuantity(4)); |
13741 | | |
13742 | | // Check the value of the field against the requested value. |
13743 | 201 | return Builder.CreateICmpEQ(CpuValue, |
13744 | 201 | llvm::ConstantInt::get(Int32Ty, Value)); |
13745 | 201 | } |
13746 | | |
13747 | 46 | Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { |
13748 | 46 | const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); |
13749 | 46 | StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); |
13750 | 46 | return EmitX86CpuSupports(FeatureStr); |
13751 | 46 | } |
13752 | | |
13753 | 199 | Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { |
13754 | 199 | return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs)); |
13755 | 199 | } |
13756 | | |
13757 | | llvm::Value * |
13758 | 199 | CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) { |
13759 | 199 | Value *Result = Builder.getTrue(); |
13760 | 199 | if (FeatureMask[0] != 0) { |
13761 | | // Matching the struct layout from the compiler-rt/libgcc structure that is |
13762 | | // filled in: |
13763 | | // unsigned int __cpu_vendor; |
13764 | | // unsigned int __cpu_type; |
13765 | | // unsigned int __cpu_subtype; |
13766 | | // unsigned int __cpu_features[1]; |
13767 | 180 | llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, |
13768 | 180 | llvm::ArrayType::get(Int32Ty, 1)); |
13769 | | |
13770 | | // Grab the global __cpu_model. |
13771 | 180 | llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); |
13772 | 180 | cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); |
13773 | | |
13774 | | // Grab the first (0th) element from the field __cpu_features off of the |
13775 | | // global in the struct STy. |
13776 | 180 | Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), |
13777 | 180 | Builder.getInt32(0)}; |
13778 | 180 | Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); |
13779 | 180 | Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures, |
13780 | 180 | CharUnits::fromQuantity(4)); |
13781 | | |
13782 | | // Check the value of the bit corresponding to the feature requested. |
13783 | 180 | Value *Mask = Builder.getInt32(FeatureMask[0]); |
13784 | 180 | Value *Bitset = Builder.CreateAnd(Features, Mask); |
13785 | 180 | Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); |
13786 | 180 | Result = Builder.CreateAnd(Result, Cmp); |
13787 | 180 | } |
13788 | | |
13789 | 199 | llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3); |
13790 | 199 | llvm::Constant *CpuFeatures2 = |
13791 | 199 | CGM.CreateRuntimeVariable(ATy, "__cpu_features2"); |
13792 | 199 | cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true); |
13793 | 796 | for (int i = 1; i != 4; ++i597 ) { |
13794 | 597 | const uint32_t M = FeatureMask[i]; |
13795 | 597 | if (!M) |
13796 | 578 | continue; |
13797 | 19 | Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)}; |
13798 | 19 | Value *Features = Builder.CreateAlignedLoad( |
13799 | 19 | Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs), |
13800 | 19 | CharUnits::fromQuantity(4)); |
13801 | | // Check the value of the bit corresponding to the feature requested. |
13802 | 19 | Value *Mask = Builder.getInt32(M); |
13803 | 19 | Value *Bitset = Builder.CreateAnd(Features, Mask); |
13804 | 19 | Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); |
13805 | 19 | Result = Builder.CreateAnd(Result, Cmp); |
13806 | 19 | } |
13807 | | |
13808 | 199 | return Result; |
13809 | 199 | } |
13810 | | |
13811 | 18 | Value *CodeGenFunction::EmitAArch64CpuInit() { |
13812 | 18 | llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); |
13813 | 18 | llvm::FunctionCallee Func = |
13814 | 18 | CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver"); |
13815 | 18 | cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); |
13816 | 18 | cast<llvm::GlobalValue>(Func.getCallee()) |
13817 | 18 | ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); |
13818 | 18 | return Builder.CreateCall(Func); |
13819 | 18 | } |
13820 | | |
13821 | 130 | Value *CodeGenFunction::EmitX86CpuInit() { |
13822 | 130 | llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, |
13823 | 130 | /*Variadic*/ false); |
13824 | 130 | llvm::FunctionCallee Func = |
13825 | 130 | CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); |
13826 | 130 | cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); |
13827 | 130 | cast<llvm::GlobalValue>(Func.getCallee()) |
13828 | 130 | ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); |
13829 | 130 | return Builder.CreateCall(Func); |
13830 | 130 | } |
13831 | | |
13832 | | llvm::Value * |
13833 | 47 | CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) { |
13834 | 47 | uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs); |
13835 | 47 | Value *Result = Builder.getTrue(); |
13836 | 47 | if (FeaturesMask != 0) { |
13837 | | // Get features from structure in runtime library |
13838 | | // struct { |
13839 | | // unsigned long long features; |
13840 | | // } __aarch64_cpu_features; |
13841 | 47 | llvm::Type *STy = llvm::StructType::get(Int64Ty); |
13842 | 47 | llvm::Constant *AArch64CPUFeatures = |
13843 | 47 | CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features"); |
13844 | 47 | cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true); |
13845 | 47 | llvm::Value *CpuFeatures = Builder.CreateGEP( |
13846 | 47 | STy, AArch64CPUFeatures, |
13847 | 47 | {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)}); |
13848 | 47 | Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures, |
13849 | 47 | CharUnits::fromQuantity(8)); |
13850 | 47 | Value *Mask = Builder.getInt64(FeaturesMask); |
13851 | 47 | Value *Bitset = Builder.CreateAnd(Features, Mask); |
13852 | 47 | Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); |
13853 | 47 | Result = Builder.CreateAnd(Result, Cmp); |
13854 | 47 | } |
13855 | 47 | return Result; |
13856 | 47 | } |
13857 | | |
13858 | | Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, |
13859 | 9.30k | const CallExpr *E) { |
13860 | 9.30k | if (BuiltinID == X86::BI__builtin_cpu_is) |
13861 | 65 | return EmitX86CpuIs(E); |
13862 | 9.24k | if (BuiltinID == X86::BI__builtin_cpu_supports) |
13863 | 46 | return EmitX86CpuSupports(E); |
13864 | 9.19k | if (BuiltinID == X86::BI__builtin_cpu_init) |
13865 | 1 | return EmitX86CpuInit(); |
13866 | | |
13867 | | // Handle MSVC intrinsics before argument evaluation to prevent double |
13868 | | // evaluation. |
13869 | 9.19k | if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID)) |
13870 | 39 | return EmitMSVCBuiltinExpr(*MsvcIntId, E); |
13871 | | |
13872 | 9.15k | SmallVector<Value*, 4> Ops; |
13873 | 9.15k | bool IsMaskFCmp = false; |
13874 | 9.15k | bool IsConjFMA = false; |
13875 | | |
13876 | | // Find out if any arguments are required to be integer constant expressions. |
13877 | 9.15k | unsigned ICEArguments = 0; |
13878 | 9.15k | ASTContext::GetBuiltinTypeError Error; |
13879 | 9.15k | getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); |
13880 | 9.15k | assert(Error == ASTContext::GE_None && "Should not codegen an error"); |
13881 | | |
13882 | 38.8k | for (unsigned i = 0, e = E->getNumArgs(); 9.15k i != e; i++29.6k ) { |
13883 | 29.6k | Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); |
13884 | 29.6k | } |
13885 | | |
13886 | | // These exist so that the builtin that takes an immediate can be bounds |
13887 | | // checked by clang to avoid passing bad immediates to the backend. Since |
13888 | | // AVX has a larger immediate than SSE we would need separate builtins to |
13889 | | // do the different bounds checking. Rather than create a clang specific |
13890 | | // SSE only builtin, this implements eight separate builtins to match gcc |
13891 | | // implementation. |
13892 | 9.15k | auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { |
13893 | 100 | Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); |
13894 | 100 | llvm::Function *F = CGM.getIntrinsic(ID); |
13895 | 100 | return Builder.CreateCall(F, Ops); |
13896 | 100 | }; |
13897 | | |
13898 | | // For the vector forms of FP comparisons, translate the builtins directly to |
13899 | | // IR. |
13900 | | // TODO: The builtins could be removed if the SSE header files used vector |
13901 | | // extension comparisons directly (vector ordered/unordered may need |
13902 | | // additional support via __builtin_isnan()). |
13903 | 9.15k | auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred, |
13904 | 9.15k | bool IsSignaling) { |
13905 | 1.04k | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
13906 | 1.04k | Value *Cmp; |
13907 | 1.04k | if (IsSignaling) |
13908 | 528 | Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); |
13909 | 513 | else |
13910 | 513 | Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); |
13911 | 1.04k | llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); |
13912 | 1.04k | llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); |
13913 | 1.04k | Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); |
13914 | 1.04k | return Builder.CreateBitCast(Sext, FPVecTy); |
13915 | 1.04k | }; |
13916 | | |
13917 | 9.15k | switch (BuiltinID) { |
13918 | 0 | default: return nullptr; |
13919 | 0 | case X86::BI_mm_prefetch: { |
13920 | 0 | Value *Address = Ops[0]; |
13921 | 0 | ConstantInt *C = cast<ConstantInt>(Ops[1]); |
13922 | 0 | Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); |
13923 | 0 | Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); |
13924 | 0 | Value *Data = ConstantInt::get(Int32Ty, 1); |
13925 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); |
13926 | 0 | return Builder.CreateCall(F, {Address, RW, Locality, Data}); |
13927 | 0 | } |
13928 | 6 | case X86::BI_mm_clflush: { |
13929 | 6 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), |
13930 | 6 | Ops[0]); |
13931 | 0 | } |
13932 | 6 | case X86::BI_mm_lfence: { |
13933 | 6 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); |
13934 | 0 | } |
13935 | 6 | case X86::BI_mm_mfence: { |
13936 | 6 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); |
13937 | 0 | } |
13938 | 3 | case X86::BI_mm_sfence: { |
13939 | 3 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); |
13940 | 0 | } |
13941 | 8 | case X86::BI_mm_pause: { |
13942 | 8 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); |
13943 | 0 | } |
13944 | 3 | case X86::BI__rdtsc: { |
13945 | 3 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); |
13946 | 0 | } |
13947 | 3 | case X86::BI__builtin_ia32_rdtscp: { |
13948 | 3 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp)); |
13949 | 3 | Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), |
13950 | 3 | Ops[0]); |
13951 | 3 | return Builder.CreateExtractValue(Call, 0); |
13952 | 0 | } |
13953 | 1 | case X86::BI__builtin_ia32_lzcnt_u16: |
13954 | 3 | case X86::BI__builtin_ia32_lzcnt_u32: |
13955 | 5 | case X86::BI__builtin_ia32_lzcnt_u64: { |
13956 | 5 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); |
13957 | 5 | return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); |
13958 | 3 | } |
13959 | 2 | case X86::BI__builtin_ia32_tzcnt_u16: |
13960 | 6 | case X86::BI__builtin_ia32_tzcnt_u32: |
13961 | 10 | case X86::BI__builtin_ia32_tzcnt_u64: { |
13962 | 10 | Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); |
13963 | 10 | return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); |
13964 | 6 | } |
13965 | 48 | case X86::BI__builtin_ia32_undef128: |
13966 | 100 | case X86::BI__builtin_ia32_undef256: |
13967 | 118 | case X86::BI__builtin_ia32_undef512: |
13968 | | // The x86 definition of "undef" is not the same as the LLVM definition |
13969 | | // (PR32176). We leave optimizing away an unnecessary zero constant to the |
13970 | | // IR optimizer and backend. |
13971 | | // TODO: If we had a "freeze" IR instruction to generate a fixed undef |
13972 | | // value, we should use that here instead of a zero. |
13973 | 118 | return llvm::Constant::getNullValue(ConvertType(E->getType())); |
13974 | 2 | case X86::BI__builtin_ia32_vec_init_v8qi: |
13975 | 4 | case X86::BI__builtin_ia32_vec_init_v4hi: |
13976 | 10 | case X86::BI__builtin_ia32_vec_init_v2si: |
13977 | 10 | return Builder.CreateBitCast(BuildVector(Ops), |
13978 | 10 | llvm::Type::getX86_MMXTy(getLLVMContext())); |
13979 | 5 | case X86::BI__builtin_ia32_vec_ext_v2si: |
13980 | 10 | case X86::BI__builtin_ia32_vec_ext_v16qi: |
13981 | 16 | case X86::BI__builtin_ia32_vec_ext_v8hi: |
13982 | 20 | case X86::BI__builtin_ia32_vec_ext_v4si: |
13983 | 24 | case X86::BI__builtin_ia32_vec_ext_v4sf: |
13984 | 28 | case X86::BI__builtin_ia32_vec_ext_v2di: |
13985 | 33 | case X86::BI__builtin_ia32_vec_ext_v32qi: |
13986 | 38 | case X86::BI__builtin_ia32_vec_ext_v16hi: |
13987 | 46 | case X86::BI__builtin_ia32_vec_ext_v8si: |
13988 | 49 | case X86::BI__builtin_ia32_vec_ext_v4di: { |
13989 | 49 | unsigned NumElts = |
13990 | 49 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
13991 | 49 | uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue(); |
13992 | 49 | Index &= NumElts - 1; |
13993 | | // These builtins exist so we can ensure the index is an ICE and in range. |
13994 | | // Otherwise we could just do this in the header file. |
13995 | 49 | return Builder.CreateExtractElement(Ops[0], Index); |
13996 | 46 | } |
13997 | 4 | case X86::BI__builtin_ia32_vec_set_v16qi: |
13998 | 9 | case X86::BI__builtin_ia32_vec_set_v8hi: |
13999 | 13 | case X86::BI__builtin_ia32_vec_set_v4si: |
14000 | 15 | case X86::BI__builtin_ia32_vec_set_v2di: |
14001 | 20 | case X86::BI__builtin_ia32_vec_set_v32qi: |
14002 | 25 | case X86::BI__builtin_ia32_vec_set_v16hi: |
14003 | 30 | case X86::BI__builtin_ia32_vec_set_v8si: |
14004 | 33 | case X86::BI__builtin_ia32_vec_set_v4di: { |
14005 | 33 | unsigned NumElts = |
14006 | 33 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
14007 | 33 | unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); |
14008 | 33 | Index &= NumElts - 1; |
14009 | | // These builtins exist so we can ensure the index is an ICE and in range. |
14010 | | // Otherwise we could just do this in the header file. |
14011 | 33 | return Builder.CreateInsertElement(Ops[0], Ops[1], Index); |
14012 | 30 | } |
14013 | 11 | case X86::BI_mm_setcsr: |
14014 | 13 | case X86::BI__builtin_ia32_ldmxcsr: { |
14015 | 13 | Address Tmp = CreateMemTemp(E->getArg(0)->getType()); |
14016 | 13 | Builder.CreateStore(Ops[0], Tmp); |
14017 | 13 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), |
14018 | 13 | Tmp.getPointer()); |
14019 | 11 | } |
14020 | 19 | case X86::BI_mm_getcsr: |
14021 | 21 | case X86::BI__builtin_ia32_stmxcsr: { |
14022 | 21 | Address Tmp = CreateMemTemp(E->getType()); |
14023 | 21 | Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), |
14024 | 21 | Tmp.getPointer()); |
14025 | 21 | return Builder.CreateLoad(Tmp, "stmxcsr"); |
14026 | 19 | } |
14027 | 10 | case X86::BI__builtin_ia32_xsave: |
14028 | 16 | case X86::BI__builtin_ia32_xsave64: |
14029 | 26 | case X86::BI__builtin_ia32_xrstor: |
14030 | 32 | case X86::BI__builtin_ia32_xrstor64: |
14031 | 42 | case X86::BI__builtin_ia32_xsaveopt: |
14032 | 48 | case X86::BI__builtin_ia32_xsaveopt64: |
14033 | 58 | case X86::BI__builtin_ia32_xrstors: |
14034 | 64 | case X86::BI__builtin_ia32_xrstors64: |
14035 | 74 | case X86::BI__builtin_ia32_xsavec: |
14036 | 80 | case X86::BI__builtin_ia32_xsavec64: |
14037 | 90 | case X86::BI__builtin_ia32_xsaves: |
14038 | 96 | case X86::BI__builtin_ia32_xsaves64: |
14039 | 102 | case X86::BI__builtin_ia32_xsetbv: |
14040 | 102 | case X86::BI_xsetbv: { |
14041 | 102 | Intrinsic::ID ID; |
14042 | 102 | #define INTRINSIC_X86_XSAVE_ID(NAME) \ |
14043 | 102 | case X86::BI__builtin_ia32_##NAME: \ |
14044 | 102 | ID = Intrinsic::x86_##NAME; \ |
14045 | 102 | break |
14046 | 102 | switch (BuiltinID) { |
14047 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
14048 | 10 | INTRINSIC_X86_XSAVE_ID(xsave)0 ; |
14049 | 6 | INTRINSIC_X86_XSAVE_ID(xsave64)0 ; |
14050 | 10 | INTRINSIC_X86_XSAVE_ID(xrstor)0 ; |
14051 | 6 | INTRINSIC_X86_XSAVE_ID(xrstor64)0 ; |
14052 | 10 | INTRINSIC_X86_XSAVE_ID(xsaveopt)0 ; |
14053 | 6 | INTRINSIC_X86_XSAVE_ID(xsaveopt64)0 ; |
14054 | 10 | INTRINSIC_X86_XSAVE_ID(xrstors)0 ; |
14055 | 6 | INTRINSIC_X86_XSAVE_ID(xrstors64)0 ; |
14056 | 10 | INTRINSIC_X86_XSAVE_ID(xsavec)0 ; |
14057 | 6 | INTRINSIC_X86_XSAVE_ID(xsavec64)0 ; |
14058 | 10 | INTRINSIC_X86_XSAVE_ID(xsaves)0 ; |
14059 | 6 | INTRINSIC_X86_XSAVE_ID(xsaves64)0 ; |
14060 | 6 | INTRINSIC_X86_XSAVE_ID(xsetbv)0 ; |
14061 | 0 | case X86::BI_xsetbv: |
14062 | 0 | ID = Intrinsic::x86_xsetbv; |
14063 | 0 | break; |
14064 | 102 | } |
14065 | 102 | #undef INTRINSIC_X86_XSAVE_ID |
14066 | 102 | Value *Mhi = Builder.CreateTrunc( |
14067 | 102 | Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); |
14068 | 102 | Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); |
14069 | 102 | Ops[1] = Mhi; |
14070 | 102 | Ops.push_back(Mlo); |
14071 | 102 | return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); |
14072 | 102 | } |
14073 | 6 | case X86::BI__builtin_ia32_xgetbv: |
14074 | 8 | case X86::BI_xgetbv: |
14075 | 8 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); |
14076 | 1 | case X86::BI__builtin_ia32_storedqudi128_mask: |
14077 | 2 | case X86::BI__builtin_ia32_storedqusi128_mask: |
14078 | 4 | case X86::BI__builtin_ia32_storedquhi128_mask: |
14079 | 6 | case X86::BI__builtin_ia32_storedquqi128_mask: |
14080 | 7 | case X86::BI__builtin_ia32_storeupd128_mask: |
14081 | 8 | case X86::BI__builtin_ia32_storeups128_mask: |
14082 | 9 | case X86::BI__builtin_ia32_storedqudi256_mask: |
14083 | 10 | case X86::BI__builtin_ia32_storedqusi256_mask: |
14084 | 12 | case X86::BI__builtin_ia32_storedquhi256_mask: |
14085 | 14 | case X86::BI__builtin_ia32_storedquqi256_mask: |
14086 | 15 | case X86::BI__builtin_ia32_storeupd256_mask: |
14087 | 16 | case X86::BI__builtin_ia32_storeups256_mask: |
14088 | 18 | case X86::BI__builtin_ia32_storedqudi512_mask: |
14089 | 20 | case X86::BI__builtin_ia32_storedqusi512_mask: |
14090 | 22 | case X86::BI__builtin_ia32_storedquhi512_mask: |
14091 | 24 | case X86::BI__builtin_ia32_storedquqi512_mask: |
14092 | 24 | case X86::BI__builtin_ia32_storeupd512_mask: |
14093 | 24 | case X86::BI__builtin_ia32_storeups512_mask: |
14094 | 24 | return EmitX86MaskedStore(*this, Ops, Align(1)); |
14095 | | |
14096 | 1 | case X86::BI__builtin_ia32_storesh128_mask: |
14097 | 3 | case X86::BI__builtin_ia32_storess128_mask: |
14098 | 5 | case X86::BI__builtin_ia32_storesd128_mask: |
14099 | 5 | return EmitX86MaskedStore(*this, Ops, Align(1)); |
14100 | | |
14101 | 1 | case X86::BI__builtin_ia32_vpopcntb_128: |
14102 | 2 | case X86::BI__builtin_ia32_vpopcntd_128: |
14103 | 3 | case X86::BI__builtin_ia32_vpopcntq_128: |
14104 | 4 | case X86::BI__builtin_ia32_vpopcntw_128: |
14105 | 5 | case X86::BI__builtin_ia32_vpopcntb_256: |
14106 | 6 | case X86::BI__builtin_ia32_vpopcntd_256: |
14107 | 7 | case X86::BI__builtin_ia32_vpopcntq_256: |
14108 | 8 | case X86::BI__builtin_ia32_vpopcntw_256: |
14109 | 9 | case X86::BI__builtin_ia32_vpopcntb_512: |
14110 | 10 | case X86::BI__builtin_ia32_vpopcntd_512: |
14111 | 11 | case X86::BI__builtin_ia32_vpopcntq_512: |
14112 | 12 | case X86::BI__builtin_ia32_vpopcntw_512: { |
14113 | 12 | llvm::Type *ResultType = ConvertType(E->getType()); |
14114 | 12 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); |
14115 | 12 | return Builder.CreateCall(F, Ops); |
14116 | 11 | } |
14117 | 2 | case X86::BI__builtin_ia32_cvtmask2b128: |
14118 | 4 | case X86::BI__builtin_ia32_cvtmask2b256: |
14119 | 6 | case X86::BI__builtin_ia32_cvtmask2b512: |
14120 | 8 | case X86::BI__builtin_ia32_cvtmask2w128: |
14121 | 10 | case X86::BI__builtin_ia32_cvtmask2w256: |
14122 | 12 | case X86::BI__builtin_ia32_cvtmask2w512: |
14123 | 13 | case X86::BI__builtin_ia32_cvtmask2d128: |
14124 | 14 | case X86::BI__builtin_ia32_cvtmask2d256: |
14125 | 15 | case X86::BI__builtin_ia32_cvtmask2d512: |
14126 | 16 | case X86::BI__builtin_ia32_cvtmask2q128: |
14127 | 17 | case X86::BI__builtin_ia32_cvtmask2q256: |
14128 | 18 | case X86::BI__builtin_ia32_cvtmask2q512: |
14129 | 18 | return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); |
14130 | | |
14131 | 2 | case X86::BI__builtin_ia32_cvtb2mask128: |
14132 | 4 | case X86::BI__builtin_ia32_cvtb2mask256: |
14133 | 6 | case X86::BI__builtin_ia32_cvtb2mask512: |
14134 | 8 | case X86::BI__builtin_ia32_cvtw2mask128: |
14135 | 10 | case X86::BI__builtin_ia32_cvtw2mask256: |
14136 | 12 | case X86::BI__builtin_ia32_cvtw2mask512: |
14137 | 13 | case X86::BI__builtin_ia32_cvtd2mask128: |
14138 | 14 | case X86::BI__builtin_ia32_cvtd2mask256: |
14139 | 15 | case X86::BI__builtin_ia32_cvtd2mask512: |
14140 | 16 | case X86::BI__builtin_ia32_cvtq2mask128: |
14141 | 17 | case X86::BI__builtin_ia32_cvtq2mask256: |
14142 | 18 | case X86::BI__builtin_ia32_cvtq2mask512: |
14143 | 18 | return EmitX86ConvertToMask(*this, Ops[0]); |
14144 | | |
14145 | 6 | case X86::BI__builtin_ia32_cvtdq2ps512_mask: |
14146 | 36 | case X86::BI__builtin_ia32_cvtqq2ps512_mask: |
14147 | 51 | case X86::BI__builtin_ia32_cvtqq2pd512_mask: |
14148 | 57 | case X86::BI__builtin_ia32_vcvtw2ph512_mask: |
14149 | 63 | case X86::BI__builtin_ia32_vcvtdq2ph512_mask: |
14150 | 69 | case X86::BI__builtin_ia32_vcvtqq2ph512_mask: |
14151 | 69 | return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true); |
14152 | 6 | case X86::BI__builtin_ia32_cvtudq2ps512_mask: |
14153 | 36 | case X86::BI__builtin_ia32_cvtuqq2ps512_mask: |
14154 | 51 | case X86::BI__builtin_ia32_cvtuqq2pd512_mask: |
14155 | 57 | case X86::BI__builtin_ia32_vcvtuw2ph512_mask: |
14156 | 63 | case X86::BI__builtin_ia32_vcvtudq2ph512_mask: |
14157 | 69 | case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: |
14158 | 69 | return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false); |
14159 | | |
14160 | 20 | case X86::BI__builtin_ia32_vfmaddss3: |
14161 | 40 | case X86::BI__builtin_ia32_vfmaddsd3: |
14162 | 56 | case X86::BI__builtin_ia32_vfmaddsh3_mask: |
14163 | 80 | case X86::BI__builtin_ia32_vfmaddss3_mask: |
14164 | 104 | case X86::BI__builtin_ia32_vfmaddsd3_mask: |
14165 | 104 | return EmitScalarFMAExpr(*this, E, Ops, Ops[0]); |
14166 | 8 | case X86::BI__builtin_ia32_vfmaddss: |
14167 | 16 | case X86::BI__builtin_ia32_vfmaddsd: |
14168 | 16 | return EmitScalarFMAExpr(*this, E, Ops, |
14169 | 16 | Constant::getNullValue(Ops[0]->getType())); |
14170 | 8 | case X86::BI__builtin_ia32_vfmaddsh3_maskz: |
14171 | 24 | case X86::BI__builtin_ia32_vfmaddss3_maskz: |
14172 | 40 | case X86::BI__builtin_ia32_vfmaddsd3_maskz: |
14173 | 40 | return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true); |
14174 | 4 | case X86::BI__builtin_ia32_vfmaddsh3_mask3: |
14175 | 12 | case X86::BI__builtin_ia32_vfmaddss3_mask3: |
14176 | 20 | case X86::BI__builtin_ia32_vfmaddsd3_mask3: |
14177 | 20 | return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2); |
14178 | 4 | case X86::BI__builtin_ia32_vfmsubsh3_mask3: |
14179 | 12 | case X86::BI__builtin_ia32_vfmsubss3_mask3: |
14180 | 20 | case X86::BI__builtin_ia32_vfmsubsd3_mask3: |
14181 | 20 | return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, |
14182 | 20 | /*NegAcc*/ true); |
14183 | 14 | case X86::BI__builtin_ia32_vfmaddph: |
14184 | 57 | case X86::BI__builtin_ia32_vfmaddps: |
14185 | 97 | case X86::BI__builtin_ia32_vfmaddpd: |
14186 | 113 | case X86::BI__builtin_ia32_vfmaddph256: |
14187 | 153 | case X86::BI__builtin_ia32_vfmaddps256: |
14188 | 193 | case X86::BI__builtin_ia32_vfmaddpd256: |
14189 | 209 | case X86::BI__builtin_ia32_vfmaddph512_mask: |
14190 | 217 | case X86::BI__builtin_ia32_vfmaddph512_maskz: |
14191 | 221 | case X86::BI__builtin_ia32_vfmaddph512_mask3: |
14192 | 253 | case X86::BI__builtin_ia32_vfmaddps512_mask: |
14193 | 269 | case X86::BI__builtin_ia32_vfmaddps512_maskz: |
14194 | 277 | case X86::BI__builtin_ia32_vfmaddps512_mask3: |
14195 | 285 | case X86::BI__builtin_ia32_vfmsubps512_mask3: |
14196 | 317 | case X86::BI__builtin_ia32_vfmaddpd512_mask: |
14197 | 333 | case X86::BI__builtin_ia32_vfmaddpd512_maskz: |
14198 | 341 | case X86::BI__builtin_ia32_vfmaddpd512_mask3: |
14199 | 349 | case X86::BI__builtin_ia32_vfmsubpd512_mask3: |
14200 | 353 | case X86::BI__builtin_ia32_vfmsubph512_mask3: |
14201 | 353 | return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false); |
14202 | 8 | case X86::BI__builtin_ia32_vfmaddsubph512_mask: |
14203 | 12 | case X86::BI__builtin_ia32_vfmaddsubph512_maskz: |
14204 | 14 | case X86::BI__builtin_ia32_vfmaddsubph512_mask3: |
14205 | 16 | case X86::BI__builtin_ia32_vfmsubaddph512_mask3: |
14206 | 32 | case X86::BI__builtin_ia32_vfmaddsubps512_mask: |
14207 | 40 | case X86::BI__builtin_ia32_vfmaddsubps512_maskz: |
14208 | 44 | case X86::BI__builtin_ia32_vfmaddsubps512_mask3: |
14209 | 48 | case X86::BI__builtin_ia32_vfmsubaddps512_mask3: |
14210 | 64 | case X86::BI__builtin_ia32_vfmaddsubpd512_mask: |
14211 | 72 | case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: |
14212 | 76 | case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: |
14213 | 80 | case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: |
14214 | 80 | return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true); |
14215 | | |
14216 | 1 | case X86::BI__builtin_ia32_movdqa32store128_mask: |
14217 | 2 | case X86::BI__builtin_ia32_movdqa64store128_mask: |
14218 | 3 | case X86::BI__builtin_ia32_storeaps128_mask: |
14219 | 4 | case X86::BI__builtin_ia32_storeapd128_mask: |
14220 | 5 | case X86::BI__builtin_ia32_movdqa32store256_mask: |
14221 | 6 | case X86::BI__builtin_ia32_movdqa64store256_mask: |
14222 | 7 | case X86::BI__builtin_ia32_storeaps256_mask: |
14223 | 8 | case X86::BI__builtin_ia32_storeapd256_mask: |
14224 | 10 | case X86::BI__builtin_ia32_movdqa32store512_mask: |
14225 | 12 | case X86::BI__builtin_ia32_movdqa64store512_mask: |
14226 | 14 | case X86::BI__builtin_ia32_storeaps512_mask: |
14227 | 16 | case X86::BI__builtin_ia32_storeapd512_mask: |
14228 | 16 | return EmitX86MaskedStore( |
14229 | 16 | *this, Ops, |
14230 | 16 | getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); |
14231 | | |
14232 | 2 | case X86::BI__builtin_ia32_loadups128_mask: |
14233 | 4 | case X86::BI__builtin_ia32_loadups256_mask: |
14234 | 6 | case X86::BI__builtin_ia32_loadups512_mask: |
14235 | 8 | case X86::BI__builtin_ia32_loadupd128_mask: |
14236 | 10 | case X86::BI__builtin_ia32_loadupd256_mask: |
14237 | 12 | case X86::BI__builtin_ia32_loadupd512_mask: |
14238 | 16 | case X86::BI__builtin_ia32_loaddquqi128_mask: |
14239 | 20 | case X86::BI__builtin_ia32_loaddquqi256_mask: |
14240 | 24 | case X86::BI__builtin_ia32_loaddquqi512_mask: |
14241 | 28 | case X86::BI__builtin_ia32_loaddquhi128_mask: |
14242 | 32 | case X86::BI__builtin_ia32_loaddquhi256_mask: |
14243 | 36 | case X86::BI__builtin_ia32_loaddquhi512_mask: |
14244 | 38 | case X86::BI__builtin_ia32_loaddqusi128_mask: |
14245 | 40 | case X86::BI__builtin_ia32_loaddqusi256_mask: |
14246 | 44 | case X86::BI__builtin_ia32_loaddqusi512_mask: |
14247 | 46 | case X86::BI__builtin_ia32_loaddqudi128_mask: |
14248 | 48 | case X86::BI__builtin_ia32_loaddqudi256_mask: |
14249 | 52 | case X86::BI__builtin_ia32_loaddqudi512_mask: |
14250 | 52 | return EmitX86MaskedLoad(*this, Ops, Align(1)); |
14251 | | |
14252 | 2 | case X86::BI__builtin_ia32_loadsh128_mask: |
14253 | 6 | case X86::BI__builtin_ia32_loadss128_mask: |
14254 | 10 | case X86::BI__builtin_ia32_loadsd128_mask: |
14255 | 10 | return EmitX86MaskedLoad(*this, Ops, Align(1)); |
14256 | | |
14257 | 2 | case X86::BI__builtin_ia32_loadaps128_mask: |
14258 | 4 | case X86::BI__builtin_ia32_loadaps256_mask: |
14259 | 8 | case X86::BI__builtin_ia32_loadaps512_mask: |
14260 | 10 | case X86::BI__builtin_ia32_loadapd128_mask: |
14261 | 12 | case X86::BI__builtin_ia32_loadapd256_mask: |
14262 | 16 | case X86::BI__builtin_ia32_loadapd512_mask: |
14263 | 18 | case X86::BI__builtin_ia32_movdqa32load128_mask: |
14264 | 20 | case X86::BI__builtin_ia32_movdqa32load256_mask: |
14265 | 24 | case X86::BI__builtin_ia32_movdqa32load512_mask: |
14266 | 26 | case X86::BI__builtin_ia32_movdqa64load128_mask: |
14267 | 28 | case X86::BI__builtin_ia32_movdqa64load256_mask: |
14268 | 32 | case X86::BI__builtin_ia32_movdqa64load512_mask: |
14269 | 32 | return EmitX86MaskedLoad( |
14270 | 32 | *this, Ops, |
14271 | 32 | getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); |
14272 | | |
14273 | 2 | case X86::BI__builtin_ia32_expandloaddf128_mask: |
14274 | 4 | case X86::BI__builtin_ia32_expandloaddf256_mask: |
14275 | 8 | case X86::BI__builtin_ia32_expandloaddf512_mask: |
14276 | 10 | case X86::BI__builtin_ia32_expandloadsf128_mask: |
14277 | 12 | case X86::BI__builtin_ia32_expandloadsf256_mask: |
14278 | 16 | case X86::BI__builtin_ia32_expandloadsf512_mask: |
14279 | 18 | case X86::BI__builtin_ia32_expandloaddi128_mask: |
14280 | 20 | case X86::BI__builtin_ia32_expandloaddi256_mask: |
14281 | 24 | case X86::BI__builtin_ia32_expandloaddi512_mask: |
14282 | 26 | case X86::BI__builtin_ia32_expandloadsi128_mask: |
14283 | 28 | case X86::BI__builtin_ia32_expandloadsi256_mask: |
14284 | 32 | case X86::BI__builtin_ia32_expandloadsi512_mask: |
14285 | 34 | case X86::BI__builtin_ia32_expandloadhi128_mask: |
14286 | 36 | case X86::BI__builtin_ia32_expandloadhi256_mask: |
14287 | 38 | case X86::BI__builtin_ia32_expandloadhi512_mask: |
14288 | 40 | case X86::BI__builtin_ia32_expandloadqi128_mask: |
14289 | 42 | case X86::BI__builtin_ia32_expandloadqi256_mask: |
14290 | 44 | case X86::BI__builtin_ia32_expandloadqi512_mask: |
14291 | 44 | return EmitX86ExpandLoad(*this, Ops); |
14292 | | |
14293 | 1 | case X86::BI__builtin_ia32_compressstoredf128_mask: |
14294 | 2 | case X86::BI__builtin_ia32_compressstoredf256_mask: |
14295 | 4 | case X86::BI__builtin_ia32_compressstoredf512_mask: |
14296 | 5 | case X86::BI__builtin_ia32_compressstoresf128_mask: |
14297 | 6 | case X86::BI__builtin_ia32_compressstoresf256_mask: |
14298 | 8 | case X86::BI__builtin_ia32_compressstoresf512_mask: |
14299 | 9 | case X86::BI__builtin_ia32_compressstoredi128_mask: |
14300 | 10 | case X86::BI__builtin_ia32_compressstoredi256_mask: |
14301 | 12 | case X86::BI__builtin_ia32_compressstoredi512_mask: |
14302 | 13 | case X86::BI__builtin_ia32_compressstoresi128_mask: |
14303 | 14 | case X86::BI__builtin_ia32_compressstoresi256_mask: |
14304 | 16 | case X86::BI__builtin_ia32_compressstoresi512_mask: |
14305 | 17 | case X86::BI__builtin_ia32_compressstorehi128_mask: |
14306 | 18 | case X86::BI__builtin_ia32_compressstorehi256_mask: |
14307 | 19 | case X86::BI__builtin_ia32_compressstorehi512_mask: |
14308 | 20 | case X86::BI__builtin_ia32_compressstoreqi128_mask: |
14309 | 21 | case X86::BI__builtin_ia32_compressstoreqi256_mask: |
14310 | 22 | case X86::BI__builtin_ia32_compressstoreqi512_mask: |
14311 | 22 | return EmitX86CompressStore(*this, Ops); |
14312 | | |
14313 | 2 | case X86::BI__builtin_ia32_expanddf128_mask: |
14314 | 4 | case X86::BI__builtin_ia32_expanddf256_mask: |
14315 | 8 | case X86::BI__builtin_ia32_expanddf512_mask: |
14316 | 10 | case X86::BI__builtin_ia32_expandsf128_mask: |
14317 | 12 | case X86::BI__builtin_ia32_expandsf256_mask: |
14318 | 16 | case X86::BI__builtin_ia32_expandsf512_mask: |
14319 | 18 | case X86::BI__builtin_ia32_expanddi128_mask: |
14320 | 20 | case X86::BI__builtin_ia32_expanddi256_mask: |
14321 | 24 | case X86::BI__builtin_ia32_expanddi512_mask: |
14322 | 26 | case X86::BI__builtin_ia32_expandsi128_mask: |
14323 | 28 | case X86::BI__builtin_ia32_expandsi256_mask: |
14324 | 32 | case X86::BI__builtin_ia32_expandsi512_mask: |
14325 | 34 | case X86::BI__builtin_ia32_expandhi128_mask: |
14326 | 36 | case X86::BI__builtin_ia32_expandhi256_mask: |
14327 | 38 | case X86::BI__builtin_ia32_expandhi512_mask: |
14328 | 40 | case X86::BI__builtin_ia32_expandqi128_mask: |
14329 | 42 | case X86::BI__builtin_ia32_expandqi256_mask: |
14330 | 44 | case X86::BI__builtin_ia32_expandqi512_mask: |
14331 | 44 | return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); |
14332 | | |
14333 | 2 | case X86::BI__builtin_ia32_compressdf128_mask: |
14334 | 4 | case X86::BI__builtin_ia32_compressdf256_mask: |
14335 | 8 | case X86::BI__builtin_ia32_compressdf512_mask: |
14336 | 10 | case X86::BI__builtin_ia32_compresssf128_mask: |
14337 | 12 | case X86::BI__builtin_ia32_compresssf256_mask: |
14338 | 16 | case X86::BI__builtin_ia32_compresssf512_mask: |
14339 | 18 | case X86::BI__builtin_ia32_compressdi128_mask: |
14340 | 20 | case X86::BI__builtin_ia32_compressdi256_mask: |
14341 | 24 | case X86::BI__builtin_ia32_compressdi512_mask: |
14342 | 26 | case X86::BI__builtin_ia32_compresssi128_mask: |
14343 | 28 | case X86::BI__builtin_ia32_compresssi256_mask: |
14344 | 32 | case X86::BI__builtin_ia32_compresssi512_mask: |
14345 | 34 | case X86::BI__builtin_ia32_compresshi128_mask: |
14346 | 36 | case X86::BI__builtin_ia32_compresshi256_mask: |
14347 | 38 | case X86::BI__builtin_ia32_compresshi512_mask: |
14348 | 40 | case X86::BI__builtin_ia32_compressqi128_mask: |
14349 | 42 | case X86::BI__builtin_ia32_compressqi256_mask: |
14350 | 44 | case X86::BI__builtin_ia32_compressqi512_mask: |
14351 | 44 | return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); |
14352 | | |
14353 | 1 | case X86::BI__builtin_ia32_gather3div2df: |
14354 | 2 | case X86::BI__builtin_ia32_gather3div2di: |
14355 | 3 | case X86::BI__builtin_ia32_gather3div4df: |
14356 | 4 | case X86::BI__builtin_ia32_gather3div4di: |
14357 | 5 | case X86::BI__builtin_ia32_gather3div4sf: |
14358 | 6 | case X86::BI__builtin_ia32_gather3div4si: |
14359 | 7 | case X86::BI__builtin_ia32_gather3div8sf: |
14360 | 8 | case X86::BI__builtin_ia32_gather3div8si: |
14361 | 9 | case X86::BI__builtin_ia32_gather3siv2df: |
14362 | 10 | case X86::BI__builtin_ia32_gather3siv2di: |
14363 | 11 | case X86::BI__builtin_ia32_gather3siv4df: |
14364 | 12 | case X86::BI__builtin_ia32_gather3siv4di: |
14365 | 13 | case X86::BI__builtin_ia32_gather3siv4sf: |
14366 | 14 | case X86::BI__builtin_ia32_gather3siv4si: |
14367 | 15 | case X86::BI__builtin_ia32_gather3siv8sf: |
14368 | 16 | case X86::BI__builtin_ia32_gather3siv8si: |
14369 | 24 | case X86::BI__builtin_ia32_gathersiv8df: |
14370 | 28 | case X86::BI__builtin_ia32_gathersiv16sf: |
14371 | 32 | case X86::BI__builtin_ia32_gatherdiv8df: |
14372 | 36 | case X86::BI__builtin_ia32_gatherdiv16sf: |
14373 | 44 | case X86::BI__builtin_ia32_gathersiv8di: |
14374 | 48 | case X86::BI__builtin_ia32_gathersiv16si: |
14375 | 52 | case X86::BI__builtin_ia32_gatherdiv8di: |
14376 | 56 | case X86::BI__builtin_ia32_gatherdiv16si: { |
14377 | 56 | Intrinsic::ID IID; |
14378 | 56 | switch (BuiltinID) { |
14379 | 0 | default: llvm_unreachable("Unexpected builtin"); |
14380 | 1 | case X86::BI__builtin_ia32_gather3div2df: |
14381 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div2_df; |
14382 | 1 | break; |
14383 | 1 | case X86::BI__builtin_ia32_gather3div2di: |
14384 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div2_di; |
14385 | 1 | break; |
14386 | 1 | case X86::BI__builtin_ia32_gather3div4df: |
14387 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div4_df; |
14388 | 1 | break; |
14389 | 1 | case X86::BI__builtin_ia32_gather3div4di: |
14390 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div4_di; |
14391 | 1 | break; |
14392 | 1 | case X86::BI__builtin_ia32_gather3div4sf: |
14393 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div4_sf; |
14394 | 1 | break; |
14395 | 1 | case X86::BI__builtin_ia32_gather3div4si: |
14396 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div4_si; |
14397 | 1 | break; |
14398 | 1 | case X86::BI__builtin_ia32_gather3div8sf: |
14399 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div8_sf; |
14400 | 1 | break; |
14401 | 1 | case X86::BI__builtin_ia32_gather3div8si: |
14402 | 1 | IID = Intrinsic::x86_avx512_mask_gather3div8_si; |
14403 | 1 | break; |
14404 | 1 | case X86::BI__builtin_ia32_gather3siv2df: |
14405 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv2_df; |
14406 | 1 | break; |
14407 | 1 | case X86::BI__builtin_ia32_gather3siv2di: |
14408 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv2_di; |
14409 | 1 | break; |
14410 | 1 | case X86::BI__builtin_ia32_gather3siv4df: |
14411 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv4_df; |
14412 | 1 | break; |
14413 | 1 | case X86::BI__builtin_ia32_gather3siv4di: |
14414 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv4_di; |
14415 | 1 | break; |
14416 | 1 | case X86::BI__builtin_ia32_gather3siv4sf: |
14417 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; |
14418 | 1 | break; |
14419 | 1 | case X86::BI__builtin_ia32_gather3siv4si: |
14420 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv4_si; |
14421 | 1 | break; |
14422 | 1 | case X86::BI__builtin_ia32_gather3siv8sf: |
14423 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; |
14424 | 1 | break; |
14425 | 1 | case X86::BI__builtin_ia32_gather3siv8si: |
14426 | 1 | IID = Intrinsic::x86_avx512_mask_gather3siv8_si; |
14427 | 1 | break; |
14428 | 8 | case X86::BI__builtin_ia32_gathersiv8df: |
14429 | 8 | IID = Intrinsic::x86_avx512_mask_gather_dpd_512; |
14430 | 8 | break; |
14431 | 4 | case X86::BI__builtin_ia32_gathersiv16sf: |
14432 | 4 | IID = Intrinsic::x86_avx512_mask_gather_dps_512; |
14433 | 4 | break; |
14434 | 4 | case X86::BI__builtin_ia32_gatherdiv8df: |
14435 | 4 | IID = Intrinsic::x86_avx512_mask_gather_qpd_512; |
14436 | 4 | break; |
14437 | 4 | case X86::BI__builtin_ia32_gatherdiv16sf: |
14438 | 4 | IID = Intrinsic::x86_avx512_mask_gather_qps_512; |
14439 | 4 | break; |
14440 | 8 | case X86::BI__builtin_ia32_gathersiv8di: |
14441 | 8 | IID = Intrinsic::x86_avx512_mask_gather_dpq_512; |
14442 | 8 | break; |
14443 | 4 | case X86::BI__builtin_ia32_gathersiv16si: |
14444 | 4 | IID = Intrinsic::x86_avx512_mask_gather_dpi_512; |
14445 | 4 | break; |
14446 | 4 | case X86::BI__builtin_ia32_gatherdiv8di: |
14447 | 4 | IID = Intrinsic::x86_avx512_mask_gather_qpq_512; |
14448 | 4 | break; |
14449 | 4 | case X86::BI__builtin_ia32_gatherdiv16si: |
14450 | 4 | IID = Intrinsic::x86_avx512_mask_gather_qpi_512; |
14451 | 4 | break; |
14452 | 56 | } |
14453 | | |
14454 | 56 | unsigned MinElts = std::min( |
14455 | 56 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(), |
14456 | 56 | cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements()); |
14457 | 56 | Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); |
14458 | 56 | Function *Intr = CGM.getIntrinsic(IID); |
14459 | 56 | return Builder.CreateCall(Intr, Ops); |
14460 | 56 | } |
14461 | | |
14462 | 8 | case X86::BI__builtin_ia32_scattersiv8df: |
14463 | 12 | case X86::BI__builtin_ia32_scattersiv16sf: |
14464 | 16 | case X86::BI__builtin_ia32_scatterdiv8df: |
14465 | 20 | case X86::BI__builtin_ia32_scatterdiv16sf: |
14466 | 28 | case X86::BI__builtin_ia32_scattersiv8di: |
14467 | 32 | case X86::BI__builtin_ia32_scattersiv16si: |
14468 | 36 | case X86::BI__builtin_ia32_scatterdiv8di: |
14469 | 40 | case X86::BI__builtin_ia32_scatterdiv16si: |
14470 | 42 | case X86::BI__builtin_ia32_scatterdiv2df: |
14471 | 44 | case X86::BI__builtin_ia32_scatterdiv2di: |
14472 | 46 | case X86::BI__builtin_ia32_scatterdiv4df: |
14473 | 48 | case X86::BI__builtin_ia32_scatterdiv4di: |
14474 | 50 | case X86::BI__builtin_ia32_scatterdiv4sf: |
14475 | 52 | case X86::BI__builtin_ia32_scatterdiv4si: |
14476 | 54 | case X86::BI__builtin_ia32_scatterdiv8sf: |
14477 | 56 | case X86::BI__builtin_ia32_scatterdiv8si: |
14478 | 58 | case X86::BI__builtin_ia32_scattersiv2df: |
14479 | 60 | case X86::BI__builtin_ia32_scattersiv2di: |
14480 | 62 | case X86::BI__builtin_ia32_scattersiv4df: |
14481 | 64 | case X86::BI__builtin_ia32_scattersiv4di: |
14482 | 66 | case X86::BI__builtin_ia32_scattersiv4sf: |
14483 | 68 | case X86::BI__builtin_ia32_scattersiv4si: |
14484 | 70 | case X86::BI__builtin_ia32_scattersiv8sf: |
14485 | 72 | case X86::BI__builtin_ia32_scattersiv8si: { |
14486 | 72 | Intrinsic::ID IID; |
14487 | 72 | switch (BuiltinID) { |
14488 | 0 | default: llvm_unreachable("Unexpected builtin"); |
14489 | 8 | case X86::BI__builtin_ia32_scattersiv8df: |
14490 | 8 | IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; |
14491 | 8 | break; |
14492 | 4 | case X86::BI__builtin_ia32_scattersiv16sf: |
14493 | 4 | IID = Intrinsic::x86_avx512_mask_scatter_dps_512; |
14494 | 4 | break; |
14495 | 4 | case X86::BI__builtin_ia32_scatterdiv8df: |
14496 | 4 | IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; |
14497 | 4 | break; |
14498 | 4 | case X86::BI__builtin_ia32_scatterdiv16sf: |
14499 | 4 | IID = Intrinsic::x86_avx512_mask_scatter_qps_512; |
14500 | 4 | break; |
14501 | 8 | case X86::BI__builtin_ia32_scattersiv8di: |
14502 | 8 | IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; |
14503 | 8 | break; |
14504 | 4 | case X86::BI__builtin_ia32_scattersiv16si: |
14505 | 4 | IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; |
14506 | 4 | break; |
14507 | 4 | case X86::BI__builtin_ia32_scatterdiv8di: |
14508 | 4 | IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; |
14509 | 4 | break; |
14510 | 4 | case X86::BI__builtin_ia32_scatterdiv16si: |
14511 | 4 | IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; |
14512 | 4 | break; |
14513 | 2 | case X86::BI__builtin_ia32_scatterdiv2df: |
14514 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; |
14515 | 2 | break; |
14516 | 2 | case X86::BI__builtin_ia32_scatterdiv2di: |
14517 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; |
14518 | 2 | break; |
14519 | 2 | case X86::BI__builtin_ia32_scatterdiv4df: |
14520 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; |
14521 | 2 | break; |
14522 | 2 | case X86::BI__builtin_ia32_scatterdiv4di: |
14523 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; |
14524 | 2 | break; |
14525 | 2 | case X86::BI__builtin_ia32_scatterdiv4sf: |
14526 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; |
14527 | 2 | break; |
14528 | 2 | case X86::BI__builtin_ia32_scatterdiv4si: |
14529 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; |
14530 | 2 | break; |
14531 | 2 | case X86::BI__builtin_ia32_scatterdiv8sf: |
14532 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; |
14533 | 2 | break; |
14534 | 2 | case X86::BI__builtin_ia32_scatterdiv8si: |
14535 | 2 | IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; |
14536 | 2 | break; |
14537 | 2 | case X86::BI__builtin_ia32_scattersiv2df: |
14538 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv2_df; |
14539 | 2 | break; |
14540 | 2 | case X86::BI__builtin_ia32_scattersiv2di: |
14541 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv2_di; |
14542 | 2 | break; |
14543 | 2 | case X86::BI__builtin_ia32_scattersiv4df: |
14544 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv4_df; |
14545 | 2 | break; |
14546 | 2 | case X86::BI__builtin_ia32_scattersiv4di: |
14547 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv4_di; |
14548 | 2 | break; |
14549 | 2 | case X86::BI__builtin_ia32_scattersiv4sf: |
14550 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; |
14551 | 2 | break; |
14552 | 2 | case X86::BI__builtin_ia32_scattersiv4si: |
14553 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv4_si; |
14554 | 2 | break; |
14555 | 2 | case X86::BI__builtin_ia32_scattersiv8sf: |
14556 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; |
14557 | 2 | break; |
14558 | 2 | case X86::BI__builtin_ia32_scattersiv8si: |
14559 | 2 | IID = Intrinsic::x86_avx512_mask_scattersiv8_si; |
14560 | 2 | break; |
14561 | 72 | } |
14562 | | |
14563 | 72 | unsigned MinElts = std::min( |
14564 | 72 | cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(), |
14565 | 72 | cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements()); |
14566 | 72 | Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); |
14567 | 72 | Function *Intr = CGM.getIntrinsic(IID); |
14568 | 72 | return Builder.CreateCall(Intr, Ops); |
14569 | 72 | } |
14570 | | |
14571 | 14 | case X86::BI__builtin_ia32_vextractf128_pd256: |
14572 | 28 | case X86::BI__builtin_ia32_vextractf128_ps256: |
14573 | 42 | case X86::BI__builtin_ia32_vextractf128_si256: |
14574 | 54 | case X86::BI__builtin_ia32_extract128i256: |
14575 | 60 | case X86::BI__builtin_ia32_extractf64x4_mask: |
14576 | 66 | case X86::BI__builtin_ia32_extractf32x4_mask: |
14577 | 72 | case X86::BI__builtin_ia32_extracti64x4_mask: |
14578 | 78 | case X86::BI__builtin_ia32_extracti32x4_mask: |
14579 | 81 | case X86::BI__builtin_ia32_extractf32x8_mask: |
14580 | 84 | case X86::BI__builtin_ia32_extracti32x8_mask: |
14581 | 87 | case X86::BI__builtin_ia32_extractf32x4_256_mask: |
14582 | 90 | case X86::BI__builtin_ia32_extracti32x4_256_mask: |
14583 | 93 | case X86::BI__builtin_ia32_extractf64x2_256_mask: |
14584 | 96 | case X86::BI__builtin_ia32_extracti64x2_256_mask: |
14585 | 99 | case X86::BI__builtin_ia32_extractf64x2_512_mask: |
14586 | 102 | case X86::BI__builtin_ia32_extracti64x2_512_mask: { |
14587 | 102 | auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType())); |
14588 | 102 | unsigned NumElts = DstTy->getNumElements(); |
14589 | 102 | unsigned SrcNumElts = |
14590 | 102 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
14591 | 102 | unsigned SubVectors = SrcNumElts / NumElts; |
14592 | 102 | unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue(); |
14593 | 102 | assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); |
14594 | 102 | Index &= SubVectors - 1; // Remove any extra bits. |
14595 | 102 | Index *= NumElts; |
14596 | | |
14597 | 102 | int Indices[16]; |
14598 | 458 | for (unsigned i = 0; i != NumElts; ++i356 ) |
14599 | 356 | Indices[i] = i + Index; |
14600 | | |
14601 | 102 | Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), |
14602 | 102 | "extract"); |
14603 | | |
14604 | 102 | if (Ops.size() == 4) |
14605 | 48 | Res = EmitX86Select(*this, Ops[3], Res, Ops[2]); |
14606 | | |
14607 | 102 | return Res; |
14608 | 102 | } |
14609 | 9 | case X86::BI__builtin_ia32_vinsertf128_pd256: |
14610 | 18 | case X86::BI__builtin_ia32_vinsertf128_ps256: |
14611 | 27 | case X86::BI__builtin_ia32_vinsertf128_si256: |
14612 | 39 | case X86::BI__builtin_ia32_insert128i256: |
14613 | 45 | case X86::BI__builtin_ia32_insertf64x4: |
14614 | 51 | case X86::BI__builtin_ia32_insertf32x4: |
14615 | 57 | case X86::BI__builtin_ia32_inserti64x4: |
14616 | 63 | case X86::BI__builtin_ia32_inserti32x4: |
14617 | 66 | case X86::BI__builtin_ia32_insertf32x8: |
14618 | 69 | case X86::BI__builtin_ia32_inserti32x8: |
14619 | 72 | case X86::BI__builtin_ia32_insertf32x4_256: |
14620 | 75 | case X86::BI__builtin_ia32_inserti32x4_256: |
14621 | 78 | case X86::BI__builtin_ia32_insertf64x2_256: |
14622 | 81 | case X86::BI__builtin_ia32_inserti64x2_256: |
14623 | 84 | case X86::BI__builtin_ia32_insertf64x2_512: |
14624 | 87 | case X86::BI__builtin_ia32_inserti64x2_512: { |
14625 | 87 | unsigned DstNumElts = |
14626 | 87 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
14627 | 87 | unsigned SrcNumElts = |
14628 | 87 | cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements(); |
14629 | 87 | unsigned SubVectors = DstNumElts / SrcNumElts; |
14630 | 87 | unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); |
14631 | 87 | assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); |
14632 | 87 | Index &= SubVectors - 1; // Remove any extra bits. |
14633 | 87 | Index *= SrcNumElts; |
14634 | | |
14635 | 87 | int Indices[16]; |
14636 | 819 | for (unsigned i = 0; i != DstNumElts; ++i732 ) |
14637 | 732 | Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts)426 : i306 ; |
14638 | | |
14639 | 87 | Value *Op1 = Builder.CreateShuffleVector( |
14640 | 87 | Ops[1], ArrayRef(Indices, DstNumElts), "widen"); |
14641 | | |
14642 | 819 | for (unsigned i = 0; i != DstNumElts; ++i732 ) { |
14643 | 732 | if (i >= Index && i < (Index + SrcNumElts)480 ) |
14644 | 306 | Indices[i] = (i - Index) + DstNumElts; |
14645 | 426 | else |
14646 | 426 | Indices[i] = i; |
14647 | 732 | } |
14648 | | |
14649 | 87 | return Builder.CreateShuffleVector(Ops[0], Op1, |
14650 | 87 | ArrayRef(Indices, DstNumElts), "insert"); |
14651 | 87 | } |
14652 | 6 | case X86::BI__builtin_ia32_pmovqd512_mask: |
14653 | 12 | case X86::BI__builtin_ia32_pmovwb512_mask: { |
14654 | 12 | Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType()); |
14655 | 12 | return EmitX86Select(*this, Ops[2], Res, Ops[1]); |
14656 | 6 | } |
14657 | 6 | case X86::BI__builtin_ia32_pmovdb512_mask: |
14658 | 12 | case X86::BI__builtin_ia32_pmovdw512_mask: |
14659 | 18 | case X86::BI__builtin_ia32_pmovqw512_mask: { |
14660 | 18 | if (const auto *C = dyn_cast<Constant>(Ops[2])) |
14661 | 6 | if (C->isAllOnesValue()) |
14662 | 6 | return Builder.CreateTrunc(Ops[0], Ops[1]->getType()); |
14663 | | |
14664 | 12 | Intrinsic::ID IID; |
14665 | 12 | switch (BuiltinID) { |
14666 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
14667 | 4 | case X86::BI__builtin_ia32_pmovdb512_mask: |
14668 | 4 | IID = Intrinsic::x86_avx512_mask_pmov_db_512; |
14669 | 4 | break; |
14670 | 4 | case X86::BI__builtin_ia32_pmovdw512_mask: |
14671 | 4 | IID = Intrinsic::x86_avx512_mask_pmov_dw_512; |
14672 | 4 | break; |
14673 | 4 | case X86::BI__builtin_ia32_pmovqw512_mask: |
14674 | 4 | IID = Intrinsic::x86_avx512_mask_pmov_qw_512; |
14675 | 4 | break; |
14676 | 12 | } |
14677 | | |
14678 | 12 | Function *Intr = CGM.getIntrinsic(IID); |
14679 | 12 | return Builder.CreateCall(Intr, Ops); |
14680 | 12 | } |
14681 | 4 | case X86::BI__builtin_ia32_pblendw128: |
14682 | 8 | case X86::BI__builtin_ia32_blendpd: |
14683 | 12 | case X86::BI__builtin_ia32_blendps: |
14684 | 17 | case X86::BI__builtin_ia32_blendpd256: |
14685 | 22 | case X86::BI__builtin_ia32_blendps256: |
14686 | 26 | case X86::BI__builtin_ia32_pblendw256: |
14687 | 30 | case X86::BI__builtin_ia32_pblendd128: |
14688 | 34 | case X86::BI__builtin_ia32_pblendd256: { |
14689 | 34 | unsigned NumElts = |
14690 | 34 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
14691 | 34 | unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); |
14692 | | |
14693 | 34 | int Indices[16]; |
14694 | | // If there are more than 8 elements, the immediate is used twice so make |
14695 | | // sure we handle that. |
14696 | 262 | for (unsigned i = 0; i != NumElts; ++i228 ) |
14697 | 228 | Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i86 : i142 ; |
14698 | | |
14699 | 34 | return Builder.CreateShuffleVector(Ops[0], Ops[1], |
14700 | 34 | ArrayRef(Indices, NumElts), "blend"); |
14701 | 30 | } |
14702 | 9 | case X86::BI__builtin_ia32_pshuflw: |
14703 | 17 | case X86::BI__builtin_ia32_pshuflw256: |
14704 | 23 | case X86::BI__builtin_ia32_pshuflw512: { |
14705 | 23 | uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); |
14706 | 23 | auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14707 | 23 | unsigned NumElts = Ty->getNumElements(); |
14708 | | |
14709 | | // Splat the 8-bits of immediate 4 times to help the loop wrap around. |
14710 | 23 | Imm = (Imm & 0xff) * 0x01010101; |
14711 | | |
14712 | 23 | int Indices[32]; |
14713 | 72 | for (unsigned l = 0; l != NumElts; l += 849 ) { |
14714 | 245 | for (unsigned i = 0; i != 4; ++i196 ) { |
14715 | 196 | Indices[l + i] = l + (Imm & 3); |
14716 | 196 | Imm >>= 2; |
14717 | 196 | } |
14718 | 245 | for (unsigned i = 4; i != 8; ++i196 ) |
14719 | 196 | Indices[l + i] = l + i; |
14720 | 49 | } |
14721 | | |
14722 | 23 | return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), |
14723 | 23 | "pshuflw"); |
14724 | 17 | } |
14725 | 9 | case X86::BI__builtin_ia32_pshufhw: |
14726 | 17 | case X86::BI__builtin_ia32_pshufhw256: |
14727 | 23 | case X86::BI__builtin_ia32_pshufhw512: { |
14728 | 23 | uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); |
14729 | 23 | auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14730 | 23 | unsigned NumElts = Ty->getNumElements(); |
14731 | | |
14732 | | // Splat the 8-bits of immediate 4 times to help the loop wrap around. |
14733 | 23 | Imm = (Imm & 0xff) * 0x01010101; |
14734 | | |
14735 | 23 | int Indices[32]; |
14736 | 72 | for (unsigned l = 0; l != NumElts; l += 849 ) { |
14737 | 245 | for (unsigned i = 0; i != 4; ++i196 ) |
14738 | 196 | Indices[l + i] = l + i; |
14739 | 245 | for (unsigned i = 4; i != 8; ++i196 ) { |
14740 | 196 | Indices[l + i] = l + 4 + (Imm & 3); |
14741 | 196 | Imm >>= 2; |
14742 | 196 | } |
14743 | 49 | } |
14744 | | |
14745 | 23 | return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), |
14746 | 23 | "pshufhw"); |
14747 | 17 | } |
14748 | 7 | case X86::BI__builtin_ia32_pshufd: |
14749 | 13 | case X86::BI__builtin_ia32_pshufd256: |
14750 | 19 | case X86::BI__builtin_ia32_pshufd512: |
14751 | 28 | case X86::BI__builtin_ia32_vpermilpd: |
14752 | 44 | case X86::BI__builtin_ia32_vpermilps: |
14753 | 53 | case X86::BI__builtin_ia32_vpermilpd256: |
14754 | 62 | case X86::BI__builtin_ia32_vpermilps256: |
14755 | 68 | case X86::BI__builtin_ia32_vpermilpd512: |
14756 | 74 | case X86::BI__builtin_ia32_vpermilps512: { |
14757 | 74 | uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); |
14758 | 74 | auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14759 | 74 | unsigned NumElts = Ty->getNumElements(); |
14760 | 74 | unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; |
14761 | 74 | unsigned NumLaneElts = NumElts / NumLanes; |
14762 | | |
14763 | | // Splat the 8-bits of immediate 4 times to help the loop wrap around. |
14764 | 74 | Imm = (Imm & 0xff) * 0x01010101; |
14765 | | |
14766 | 74 | int Indices[16]; |
14767 | 226 | for (unsigned l = 0; l != NumElts; l += NumLaneElts152 ) { |
14768 | 658 | for (unsigned i = 0; i != NumLaneElts; ++i506 ) { |
14769 | 506 | Indices[i + l] = (Imm % NumLaneElts) + l; |
14770 | 506 | Imm /= NumLaneElts; |
14771 | 506 | } |
14772 | 152 | } |
14773 | | |
14774 | 74 | return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), |
14775 | 74 | "permil"); |
14776 | 68 | } |
14777 | 7 | case X86::BI__builtin_ia32_shufpd: |
14778 | 14 | case X86::BI__builtin_ia32_shufpd256: |
14779 | 20 | case X86::BI__builtin_ia32_shufpd512: |
14780 | 24 | case X86::BI__builtin_ia32_shufps: |
14781 | 33 | case X86::BI__builtin_ia32_shufps256: |
14782 | 39 | case X86::BI__builtin_ia32_shufps512: { |
14783 | 39 | uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); |
14784 | 39 | auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14785 | 39 | unsigned NumElts = Ty->getNumElements(); |
14786 | 39 | unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; |
14787 | 39 | unsigned NumLaneElts = NumElts / NumLanes; |
14788 | | |
14789 | | // Splat the 8-bits of immediate 4 times to help the loop wrap around. |
14790 | 39 | Imm = (Imm & 0xff) * 0x01010101; |
14791 | | |
14792 | 39 | int Indices[16]; |
14793 | 130 | for (unsigned l = 0; l != NumElts; l += NumLaneElts91 ) { |
14794 | 365 | for (unsigned i = 0; i != NumLaneElts; ++i274 ) { |
14795 | 274 | unsigned Index = Imm % NumLaneElts; |
14796 | 274 | Imm /= NumLaneElts; |
14797 | 274 | if (i >= (NumLaneElts / 2)) |
14798 | 137 | Index += NumElts; |
14799 | 274 | Indices[l + i] = l + Index; |
14800 | 274 | } |
14801 | 91 | } |
14802 | | |
14803 | 39 | return Builder.CreateShuffleVector(Ops[0], Ops[1], |
14804 | 39 | ArrayRef(Indices, NumElts), "shufp"); |
14805 | 33 | } |
14806 | 7 | case X86::BI__builtin_ia32_permdi256: |
14807 | 14 | case X86::BI__builtin_ia32_permdf256: |
14808 | 20 | case X86::BI__builtin_ia32_permdi512: |
14809 | 26 | case X86::BI__builtin_ia32_permdf512: { |
14810 | 26 | unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); |
14811 | 26 | auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14812 | 26 | unsigned NumElts = Ty->getNumElements(); |
14813 | | |
14814 | | // These intrinsics operate on 256-bit lanes of four 64-bit elements. |
14815 | 26 | int Indices[8]; |
14816 | 64 | for (unsigned l = 0; l != NumElts; l += 438 ) |
14817 | 190 | for (unsigned i = 0; 38 i != 4; ++i152 ) |
14818 | 152 | Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); |
14819 | | |
14820 | 26 | return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), |
14821 | 26 | "perm"); |
14822 | 20 | } |
14823 | 14 | case X86::BI__builtin_ia32_palignr128: |
14824 | 26 | case X86::BI__builtin_ia32_palignr256: |
14825 | 32 | case X86::BI__builtin_ia32_palignr512: { |
14826 | 32 | unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; |
14827 | | |
14828 | 32 | unsigned NumElts = |
14829 | 32 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
14830 | 32 | assert(NumElts % 16 == 0); |
14831 | | |
14832 | | // If palignr is shifting the pair of vectors more than the size of two |
14833 | | // lanes, emit zero. |
14834 | 32 | if (ShiftVal >= 32) |
14835 | 3 | return llvm::Constant::getNullValue(ConvertType(E->getType())); |
14836 | | |
14837 | | // If palignr is shifting the pair of input vectors more than one lane, |
14838 | | // but less than two lanes, convert to shifting in zeroes. |
14839 | 29 | if (ShiftVal > 16) { |
14840 | 7 | ShiftVal -= 16; |
14841 | 7 | Ops[1] = Ops[0]; |
14842 | 7 | Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); |
14843 | 7 | } |
14844 | | |
14845 | 29 | int Indices[64]; |
14846 | | // 256-bit palignr operates on 128-bit lanes so we need to handle that |
14847 | 88 | for (unsigned l = 0; l != NumElts; l += 1659 ) { |
14848 | 1.00k | for (unsigned i = 0; i != 16; ++i944 ) { |
14849 | 944 | unsigned Idx = ShiftVal + i; |
14850 | 944 | if (Idx >= 16) |
14851 | 134 | Idx += NumElts - 16; // End of lane, switch operand. |
14852 | 944 | Indices[l + i] = Idx + l; |
14853 | 944 | } |
14854 | 59 | } |
14855 | | |
14856 | 29 | return Builder.CreateShuffleVector(Ops[1], Ops[0], |
14857 | 29 | ArrayRef(Indices, NumElts), "palignr"); |
14858 | 32 | } |
14859 | 3 | case X86::BI__builtin_ia32_alignd128: |
14860 | 6 | case X86::BI__builtin_ia32_alignd256: |
14861 | 12 | case X86::BI__builtin_ia32_alignd512: |
14862 | 15 | case X86::BI__builtin_ia32_alignq128: |
14863 | 18 | case X86::BI__builtin_ia32_alignq256: |
14864 | 24 | case X86::BI__builtin_ia32_alignq512: { |
14865 | 24 | unsigned NumElts = |
14866 | 24 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
14867 | 24 | unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; |
14868 | | |
14869 | | // Mask the shift amount to width of a vector. |
14870 | 24 | ShiftVal &= NumElts - 1; |
14871 | | |
14872 | 24 | int Indices[16]; |
14873 | 222 | for (unsigned i = 0; i != NumElts; ++i198 ) |
14874 | 198 | Indices[i] = i + ShiftVal; |
14875 | | |
14876 | 24 | return Builder.CreateShuffleVector(Ops[1], Ops[0], |
14877 | 24 | ArrayRef(Indices, NumElts), "valign"); |
14878 | 18 | } |
14879 | 3 | case X86::BI__builtin_ia32_shuf_f32x4_256: |
14880 | 6 | case X86::BI__builtin_ia32_shuf_f64x2_256: |
14881 | 9 | case X86::BI__builtin_ia32_shuf_i32x4_256: |
14882 | 12 | case X86::BI__builtin_ia32_shuf_i64x2_256: |
14883 | 18 | case X86::BI__builtin_ia32_shuf_f32x4: |
14884 | 24 | case X86::BI__builtin_ia32_shuf_f64x2: |
14885 | 30 | case X86::BI__builtin_ia32_shuf_i32x4: |
14886 | 36 | case X86::BI__builtin_ia32_shuf_i64x2: { |
14887 | 36 | unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); |
14888 | 36 | auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14889 | 36 | unsigned NumElts = Ty->getNumElements(); |
14890 | 36 | unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 424 : 212 ; |
14891 | 36 | unsigned NumLaneElts = NumElts / NumLanes; |
14892 | | |
14893 | 36 | int Indices[16]; |
14894 | 156 | for (unsigned l = 0; l != NumElts; l += NumLaneElts120 ) { |
14895 | 120 | unsigned Index = (Imm % NumLanes) * NumLaneElts; |
14896 | 120 | Imm /= NumLanes; // Discard the bits we just used. |
14897 | 120 | if (l >= (NumElts / 2)) |
14898 | 60 | Index += NumElts; // Switch to other source. |
14899 | 480 | for (unsigned i = 0; i != NumLaneElts; ++i360 ) { |
14900 | 360 | Indices[l + i] = Index + i; |
14901 | 360 | } |
14902 | 120 | } |
14903 | | |
14904 | 36 | return Builder.CreateShuffleVector(Ops[0], Ops[1], |
14905 | 36 | ArrayRef(Indices, NumElts), "shuf"); |
14906 | 30 | } |
14907 | | |
14908 | 9 | case X86::BI__builtin_ia32_vperm2f128_pd256: |
14909 | 18 | case X86::BI__builtin_ia32_vperm2f128_ps256: |
14910 | 27 | case X86::BI__builtin_ia32_vperm2f128_si256: |
14911 | 31 | case X86::BI__builtin_ia32_permti256: { |
14912 | 31 | unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); |
14913 | 31 | unsigned NumElts = |
14914 | 31 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
14915 | | |
14916 | | // This takes a very simple approach since there are two lanes and a |
14917 | | // shuffle can have 2 inputs. So we reserve the first input for the first |
14918 | | // lane and the second input for the second lane. This may result in |
14919 | | // duplicate sources, but this can be dealt with in the backend. |
14920 | | |
14921 | 31 | Value *OutOps[2]; |
14922 | 31 | int Indices[8]; |
14923 | 93 | for (unsigned l = 0; l != 2; ++l62 ) { |
14924 | | // Determine the source for this lane. |
14925 | 62 | if (Imm & (1 << ((l * 4) + 3))) |
14926 | 4 | OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); |
14927 | 58 | else if (Imm & (1 << ((l * 4) + 1))) |
14928 | 31 | OutOps[l] = Ops[1]; |
14929 | 27 | else |
14930 | 27 | OutOps[l] = Ops[0]; |
14931 | | |
14932 | 258 | for (unsigned i = 0; i != NumElts/2; ++i196 ) { |
14933 | | // Start with ith element of the source for this lane. |
14934 | 196 | unsigned Idx = (l * NumElts) + i; |
14935 | | // If bit 0 of the immediate half is set, switch to the high half of |
14936 | | // the source. |
14937 | 196 | if (Imm & (1 << (l * 4))) |
14938 | 112 | Idx += NumElts/2; |
14939 | 196 | Indices[(l * (NumElts/2)) + i] = Idx; |
14940 | 196 | } |
14941 | 62 | } |
14942 | | |
14943 | 31 | return Builder.CreateShuffleVector(OutOps[0], OutOps[1], |
14944 | 31 | ArrayRef(Indices, NumElts), "vperm"); |
14945 | 27 | } |
14946 | | |
14947 | 18 | case X86::BI__builtin_ia32_pslldqi128_byteshift: |
14948 | 26 | case X86::BI__builtin_ia32_pslldqi256_byteshift: |
14949 | 28 | case X86::BI__builtin_ia32_pslldqi512_byteshift: { |
14950 | 28 | unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; |
14951 | 28 | auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14952 | | // Builtin type is vXi64 so multiply by 8 to get bytes. |
14953 | 28 | unsigned NumElts = ResultType->getNumElements() * 8; |
14954 | | |
14955 | | // If pslldq is shifting the vector more than 15 bytes, emit zero. |
14956 | 28 | if (ShiftVal >= 16) |
14957 | 6 | return llvm::Constant::getNullValue(ResultType); |
14958 | | |
14959 | 22 | int Indices[64]; |
14960 | | // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that |
14961 | 58 | for (unsigned l = 0; l != NumElts; l += 1636 ) { |
14962 | 612 | for (unsigned i = 0; i != 16; ++i576 ) { |
14963 | 576 | unsigned Idx = NumElts + i - ShiftVal; |
14964 | 576 | if (Idx < NumElts) Idx -= NumElts - 16143 ; // end of lane, switch operand. |
14965 | 576 | Indices[l + i] = Idx + l; |
14966 | 576 | } |
14967 | 36 | } |
14968 | | |
14969 | 22 | auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); |
14970 | 22 | Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); |
14971 | 22 | Value *Zero = llvm::Constant::getNullValue(VecTy); |
14972 | 22 | Value *SV = Builder.CreateShuffleVector( |
14973 | 22 | Zero, Cast, ArrayRef(Indices, NumElts), "pslldq"); |
14974 | 22 | return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast"); |
14975 | 28 | } |
14976 | 18 | case X86::BI__builtin_ia32_psrldqi128_byteshift: |
14977 | 26 | case X86::BI__builtin_ia32_psrldqi256_byteshift: |
14978 | 28 | case X86::BI__builtin_ia32_psrldqi512_byteshift: { |
14979 | 28 | unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; |
14980 | 28 | auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType()); |
14981 | | // Builtin type is vXi64 so multiply by 8 to get bytes. |
14982 | 28 | unsigned NumElts = ResultType->getNumElements() * 8; |
14983 | | |
14984 | | // If psrldq is shifting the vector more than 15 bytes, emit zero. |
14985 | 28 | if (ShiftVal >= 16) |
14986 | 6 | return llvm::Constant::getNullValue(ResultType); |
14987 | | |
14988 | 22 | int Indices[64]; |
14989 | | // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that |
14990 | 58 | for (unsigned l = 0; l != NumElts; l += 1636 ) { |
14991 | 612 | for (unsigned i = 0; i != 16; ++i576 ) { |
14992 | 576 | unsigned Idx = i + ShiftVal; |
14993 | 576 | if (Idx >= 16) Idx += NumElts - 16143 ; // end of lane, switch operand. |
14994 | 576 | Indices[l + i] = Idx + l; |
14995 | 576 | } |
14996 | 36 | } |
14997 | | |
14998 | 22 | auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); |
14999 | 22 | Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); |
15000 | 22 | Value *Zero = llvm::Constant::getNullValue(VecTy); |
15001 | 22 | Value *SV = Builder.CreateShuffleVector( |
15002 | 22 | Cast, Zero, ArrayRef(Indices, NumElts), "psrldq"); |
15003 | 22 | return Builder.CreateBitCast(SV, ResultType, "cast"); |
15004 | 28 | } |
15005 | 1 | case X86::BI__builtin_ia32_kshiftliqi: |
15006 | 3 | case X86::BI__builtin_ia32_kshiftlihi: |
15007 | 5 | case X86::BI__builtin_ia32_kshiftlisi: |
15008 | 7 | case X86::BI__builtin_ia32_kshiftlidi: { |
15009 | 7 | unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; |
15010 | 7 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
15011 | | |
15012 | 7 | if (ShiftVal >= NumElts) |
15013 | 0 | return llvm::Constant::getNullValue(Ops[0]->getType()); |
15014 | | |
15015 | 7 | Value *In = getMaskVecValue(*this, Ops[0], NumElts); |
15016 | | |
15017 | 7 | int Indices[64]; |
15018 | 239 | for (unsigned i = 0; i != NumElts; ++i232 ) |
15019 | 232 | Indices[i] = NumElts + i - ShiftVal; |
15020 | | |
15021 | 7 | Value *Zero = llvm::Constant::getNullValue(In->getType()); |
15022 | 7 | Value *SV = Builder.CreateShuffleVector( |
15023 | 7 | Zero, In, ArrayRef(Indices, NumElts), "kshiftl"); |
15024 | 7 | return Builder.CreateBitCast(SV, Ops[0]->getType()); |
15025 | 7 | } |
15026 | 1 | case X86::BI__builtin_ia32_kshiftriqi: |
15027 | 3 | case X86::BI__builtin_ia32_kshiftrihi: |
15028 | 5 | case X86::BI__builtin_ia32_kshiftrisi: |
15029 | 7 | case X86::BI__builtin_ia32_kshiftridi: { |
15030 | 7 | unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; |
15031 | 7 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
15032 | | |
15033 | 7 | if (ShiftVal >= NumElts) |
15034 | 0 | return llvm::Constant::getNullValue(Ops[0]->getType()); |
15035 | | |
15036 | 7 | Value *In = getMaskVecValue(*this, Ops[0], NumElts); |
15037 | | |
15038 | 7 | int Indices[64]; |
15039 | 239 | for (unsigned i = 0; i != NumElts; ++i232 ) |
15040 | 232 | Indices[i] = i + ShiftVal; |
15041 | | |
15042 | 7 | Value *Zero = llvm::Constant::getNullValue(In->getType()); |
15043 | 7 | Value *SV = Builder.CreateShuffleVector( |
15044 | 7 | In, Zero, ArrayRef(Indices, NumElts), "kshiftr"); |
15045 | 7 | return Builder.CreateBitCast(SV, Ops[0]->getType()); |
15046 | 7 | } |
15047 | 7 | case X86::BI__builtin_ia32_movnti: |
15048 | 12 | case X86::BI__builtin_ia32_movnti64: |
15049 | 14 | case X86::BI__builtin_ia32_movntsd: |
15050 | 16 | case X86::BI__builtin_ia32_movntss: { |
15051 | 16 | llvm::MDNode *Node = llvm::MDNode::get( |
15052 | 16 | getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); |
15053 | | |
15054 | 16 | Value *Ptr = Ops[0]; |
15055 | 16 | Value *Src = Ops[1]; |
15056 | | |
15057 | | // Extract the 0'th element of the source vector. |
15058 | 16 | if (BuiltinID == X86::BI__builtin_ia32_movntsd || |
15059 | 16 | BuiltinID == X86::BI__builtin_ia32_movntss14 ) |
15060 | 4 | Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); |
15061 | | |
15062 | | // Unaligned nontemporal store of the scalar value. |
15063 | 16 | StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr); |
15064 | 16 | SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node); |
15065 | 16 | SI->setAlignment(llvm::Align(1)); |
15066 | 16 | return SI; |
15067 | 14 | } |
15068 | | // Rotate is a special case of funnel shift - 1st 2 args are the same. |
15069 | 4 | case X86::BI__builtin_ia32_vprotb: |
15070 | 8 | case X86::BI__builtin_ia32_vprotw: |
15071 | 12 | case X86::BI__builtin_ia32_vprotd: |
15072 | 16 | case X86::BI__builtin_ia32_vprotq: |
15073 | 20 | case X86::BI__builtin_ia32_vprotbi: |
15074 | 24 | case X86::BI__builtin_ia32_vprotwi: |
15075 | 28 | case X86::BI__builtin_ia32_vprotdi: |
15076 | 32 | case X86::BI__builtin_ia32_vprotqi: |
15077 | 35 | case X86::BI__builtin_ia32_prold128: |
15078 | 38 | case X86::BI__builtin_ia32_prold256: |
15079 | 44 | case X86::BI__builtin_ia32_prold512: |
15080 | 47 | case X86::BI__builtin_ia32_prolq128: |
15081 | 50 | case X86::BI__builtin_ia32_prolq256: |
15082 | 56 | case X86::BI__builtin_ia32_prolq512: |
15083 | 57 | case X86::BI__builtin_ia32_prolvd128: |
15084 | 58 | case X86::BI__builtin_ia32_prolvd256: |
15085 | 60 | case X86::BI__builtin_ia32_prolvd512: |
15086 | 61 | case X86::BI__builtin_ia32_prolvq128: |
15087 | 62 | case X86::BI__builtin_ia32_prolvq256: |
15088 | 64 | case X86::BI__builtin_ia32_prolvq512: |
15089 | 64 | return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false); |
15090 | 3 | case X86::BI__builtin_ia32_prord128: |
15091 | 6 | case X86::BI__builtin_ia32_prord256: |
15092 | 12 | case X86::BI__builtin_ia32_prord512: |
15093 | 15 | case X86::BI__builtin_ia32_prorq128: |
15094 | 18 | case X86::BI__builtin_ia32_prorq256: |
15095 | 24 | case X86::BI__builtin_ia32_prorq512: |
15096 | 25 | case X86::BI__builtin_ia32_prorvd128: |
15097 | 26 | case X86::BI__builtin_ia32_prorvd256: |
15098 | 28 | case X86::BI__builtin_ia32_prorvd512: |
15099 | 29 | case X86::BI__builtin_ia32_prorvq128: |
15100 | 30 | case X86::BI__builtin_ia32_prorvq256: |
15101 | 32 | case X86::BI__builtin_ia32_prorvq512: |
15102 | 32 | return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); |
15103 | 107 | case X86::BI__builtin_ia32_selectb_128: |
15104 | 210 | case X86::BI__builtin_ia32_selectb_256: |
15105 | 309 | case X86::BI__builtin_ia32_selectb_512: |
15106 | 486 | case X86::BI__builtin_ia32_selectw_128: |
15107 | 663 | case X86::BI__builtin_ia32_selectw_256: |
15108 | 836 | case X86::BI__builtin_ia32_selectw_512: |
15109 | 947 | case X86::BI__builtin_ia32_selectd_128: |
15110 | 1.06k | case X86::BI__builtin_ia32_selectd_256: |
15111 | 1.25k | case X86::BI__builtin_ia32_selectd_512: |
15112 | 1.35k | case X86::BI__builtin_ia32_selectq_128: |
15113 | 1.45k | case X86::BI__builtin_ia32_selectq_256: |
15114 | 1.65k | case X86::BI__builtin_ia32_selectq_512: |
15115 | 1.69k | case X86::BI__builtin_ia32_selectph_128: |
15116 | 1.73k | case X86::BI__builtin_ia32_selectph_256: |
15117 | 1.76k | case X86::BI__builtin_ia32_selectph_512: |
15118 | 1.76k | case X86::BI__builtin_ia32_selectpbf_128: |
15119 | 1.76k | case X86::BI__builtin_ia32_selectpbf_256: |
15120 | 1.76k | case X86::BI__builtin_ia32_selectpbf_512: |
15121 | 1.84k | case X86::BI__builtin_ia32_selectps_128: |
15122 | 1.92k | case X86::BI__builtin_ia32_selectps_256: |
15123 | 2.08k | case X86::BI__builtin_ia32_selectps_512: |
15124 | 2.15k | case X86::BI__builtin_ia32_selectpd_128: |
15125 | 2.23k | case X86::BI__builtin_ia32_selectpd_256: |
15126 | 2.40k | case X86::BI__builtin_ia32_selectpd_512: |
15127 | 2.40k | return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); |
15128 | 10 | case X86::BI__builtin_ia32_selectsh_128: |
15129 | 10 | case X86::BI__builtin_ia32_selectsbf_128: |
15130 | 30 | case X86::BI__builtin_ia32_selectss_128: |
15131 | 50 | case X86::BI__builtin_ia32_selectsd_128: { |
15132 | 50 | Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); |
15133 | 50 | Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0); |
15134 | 50 | A = EmitX86ScalarSelect(*this, Ops[0], A, B); |
15135 | 50 | return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0); |
15136 | 30 | } |
15137 | 36 | case X86::BI__builtin_ia32_cmpb128_mask: |
15138 | 72 | case X86::BI__builtin_ia32_cmpb256_mask: |
15139 | 108 | case X86::BI__builtin_ia32_cmpb512_mask: |
15140 | 144 | case X86::BI__builtin_ia32_cmpw128_mask: |
15141 | 180 | case X86::BI__builtin_ia32_cmpw256_mask: |
15142 | 216 | case X86::BI__builtin_ia32_cmpw512_mask: |
15143 | 235 | case X86::BI__builtin_ia32_cmpd128_mask: |
15144 | 253 | case X86::BI__builtin_ia32_cmpd256_mask: |
15145 | 290 | case X86::BI__builtin_ia32_cmpd512_mask: |
15146 | 308 | case X86::BI__builtin_ia32_cmpq128_mask: |
15147 | 327 | case X86::BI__builtin_ia32_cmpq256_mask: |
15148 | 361 | case X86::BI__builtin_ia32_cmpq512_mask: { |
15149 | 361 | unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; |
15150 | 361 | return EmitX86MaskedCompare(*this, CC, true, Ops); |
15151 | 327 | } |
15152 | 28 | case X86::BI__builtin_ia32_ucmpb128_mask: |
15153 | 56 | case X86::BI__builtin_ia32_ucmpb256_mask: |
15154 | 166 | case X86::BI__builtin_ia32_ucmpb512_mask: |
15155 | 194 | case X86::BI__builtin_ia32_ucmpw128_mask: |
15156 | 222 | case X86::BI__builtin_ia32_ucmpw256_mask: |
15157 | 332 | case X86::BI__builtin_ia32_ucmpw512_mask: |
15158 | 346 | case X86::BI__builtin_ia32_ucmpd128_mask: |
15159 | 358 | case X86::BI__builtin_ia32_ucmpd256_mask: |
15160 | 497 | case X86::BI__builtin_ia32_ucmpd512_mask: |
15161 | 511 | case X86::BI__builtin_ia32_ucmpq128_mask: |
15162 | 523 | case X86::BI__builtin_ia32_ucmpq256_mask: |
15163 | 590 | case X86::BI__builtin_ia32_ucmpq512_mask: { |
15164 | 590 | unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; |
15165 | 590 | return EmitX86MaskedCompare(*this, CC, false, Ops); |
15166 | 523 | } |
15167 | 36 | case X86::BI__builtin_ia32_vpcomb: |
15168 | 72 | case X86::BI__builtin_ia32_vpcomw: |
15169 | 108 | case X86::BI__builtin_ia32_vpcomd: |
15170 | 144 | case X86::BI__builtin_ia32_vpcomq: |
15171 | 144 | return EmitX86vpcom(*this, Ops, true); |
15172 | 36 | case X86::BI__builtin_ia32_vpcomub: |
15173 | 72 | case X86::BI__builtin_ia32_vpcomuw: |
15174 | 108 | case X86::BI__builtin_ia32_vpcomud: |
15175 | 144 | case X86::BI__builtin_ia32_vpcomuq: |
15176 | 144 | return EmitX86vpcom(*this, Ops, false); |
15177 | | |
15178 | 2 | case X86::BI__builtin_ia32_kortestcqi: |
15179 | 8 | case X86::BI__builtin_ia32_kortestchi: |
15180 | 12 | case X86::BI__builtin_ia32_kortestcsi: |
15181 | 16 | case X86::BI__builtin_ia32_kortestcdi: { |
15182 | 16 | Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); |
15183 | 16 | Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType()); |
15184 | 16 | Value *Cmp = Builder.CreateICmpEQ(Or, C); |
15185 | 16 | return Builder.CreateZExt(Cmp, ConvertType(E->getType())); |
15186 | 12 | } |
15187 | 2 | case X86::BI__builtin_ia32_kortestzqi: |
15188 | 8 | case X86::BI__builtin_ia32_kortestzhi: |
15189 | 12 | case X86::BI__builtin_ia32_kortestzsi: |
15190 | 16 | case X86::BI__builtin_ia32_kortestzdi: { |
15191 | 16 | Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); |
15192 | 16 | Value *C = llvm::Constant::getNullValue(Ops[0]->getType()); |
15193 | 16 | Value *Cmp = Builder.CreateICmpEQ(Or, C); |
15194 | 16 | return Builder.CreateZExt(Cmp, ConvertType(E->getType())); |
15195 | 12 | } |
15196 | | |
15197 | 2 | case X86::BI__builtin_ia32_ktestcqi: |
15198 | 4 | case X86::BI__builtin_ia32_ktestzqi: |
15199 | 6 | case X86::BI__builtin_ia32_ktestchi: |
15200 | 8 | case X86::BI__builtin_ia32_ktestzhi: |
15201 | 12 | case X86::BI__builtin_ia32_ktestcsi: |
15202 | 16 | case X86::BI__builtin_ia32_ktestzsi: |
15203 | 20 | case X86::BI__builtin_ia32_ktestcdi: |
15204 | 24 | case X86::BI__builtin_ia32_ktestzdi: { |
15205 | 24 | Intrinsic::ID IID; |
15206 | 24 | switch (BuiltinID) { |
15207 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15208 | 2 | case X86::BI__builtin_ia32_ktestcqi: |
15209 | 2 | IID = Intrinsic::x86_avx512_ktestc_b; |
15210 | 2 | break; |
15211 | 2 | case X86::BI__builtin_ia32_ktestzqi: |
15212 | 2 | IID = Intrinsic::x86_avx512_ktestz_b; |
15213 | 2 | break; |
15214 | 2 | case X86::BI__builtin_ia32_ktestchi: |
15215 | 2 | IID = Intrinsic::x86_avx512_ktestc_w; |
15216 | 2 | break; |
15217 | 2 | case X86::BI__builtin_ia32_ktestzhi: |
15218 | 2 | IID = Intrinsic::x86_avx512_ktestz_w; |
15219 | 2 | break; |
15220 | 4 | case X86::BI__builtin_ia32_ktestcsi: |
15221 | 4 | IID = Intrinsic::x86_avx512_ktestc_d; |
15222 | 4 | break; |
15223 | 4 | case X86::BI__builtin_ia32_ktestzsi: |
15224 | 4 | IID = Intrinsic::x86_avx512_ktestz_d; |
15225 | 4 | break; |
15226 | 4 | case X86::BI__builtin_ia32_ktestcdi: |
15227 | 4 | IID = Intrinsic::x86_avx512_ktestc_q; |
15228 | 4 | break; |
15229 | 4 | case X86::BI__builtin_ia32_ktestzdi: |
15230 | 4 | IID = Intrinsic::x86_avx512_ktestz_q; |
15231 | 4 | break; |
15232 | 24 | } |
15233 | | |
15234 | 24 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
15235 | 24 | Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); |
15236 | 24 | Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); |
15237 | 24 | Function *Intr = CGM.getIntrinsic(IID); |
15238 | 24 | return Builder.CreateCall(Intr, {LHS, RHS}); |
15239 | 24 | } |
15240 | | |
15241 | 1 | case X86::BI__builtin_ia32_kaddqi: |
15242 | 2 | case X86::BI__builtin_ia32_kaddhi: |
15243 | 4 | case X86::BI__builtin_ia32_kaddsi: |
15244 | 6 | case X86::BI__builtin_ia32_kadddi: { |
15245 | 6 | Intrinsic::ID IID; |
15246 | 6 | switch (BuiltinID) { |
15247 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15248 | 1 | case X86::BI__builtin_ia32_kaddqi: |
15249 | 1 | IID = Intrinsic::x86_avx512_kadd_b; |
15250 | 1 | break; |
15251 | 1 | case X86::BI__builtin_ia32_kaddhi: |
15252 | 1 | IID = Intrinsic::x86_avx512_kadd_w; |
15253 | 1 | break; |
15254 | 2 | case X86::BI__builtin_ia32_kaddsi: |
15255 | 2 | IID = Intrinsic::x86_avx512_kadd_d; |
15256 | 2 | break; |
15257 | 2 | case X86::BI__builtin_ia32_kadddi: |
15258 | 2 | IID = Intrinsic::x86_avx512_kadd_q; |
15259 | 2 | break; |
15260 | 6 | } |
15261 | | |
15262 | 6 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
15263 | 6 | Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); |
15264 | 6 | Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); |
15265 | 6 | Function *Intr = CGM.getIntrinsic(IID); |
15266 | 6 | Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); |
15267 | 6 | return Builder.CreateBitCast(Res, Ops[0]->getType()); |
15268 | 6 | } |
15269 | 1 | case X86::BI__builtin_ia32_kandqi: |
15270 | 3 | case X86::BI__builtin_ia32_kandhi: |
15271 | 5 | case X86::BI__builtin_ia32_kandsi: |
15272 | 7 | case X86::BI__builtin_ia32_kanddi: |
15273 | 7 | return EmitX86MaskLogic(*this, Instruction::And, Ops); |
15274 | 1 | case X86::BI__builtin_ia32_kandnqi: |
15275 | 3 | case X86::BI__builtin_ia32_kandnhi: |
15276 | 5 | case X86::BI__builtin_ia32_kandnsi: |
15277 | 7 | case X86::BI__builtin_ia32_kandndi: |
15278 | 7 | return EmitX86MaskLogic(*this, Instruction::And, Ops, true); |
15279 | 1 | case X86::BI__builtin_ia32_korqi: |
15280 | 4 | case X86::BI__builtin_ia32_korhi: |
15281 | 6 | case X86::BI__builtin_ia32_korsi: |
15282 | 8 | case X86::BI__builtin_ia32_kordi: |
15283 | 8 | return EmitX86MaskLogic(*this, Instruction::Or, Ops); |
15284 | 1 | case X86::BI__builtin_ia32_kxnorqi: |
15285 | 3 | case X86::BI__builtin_ia32_kxnorhi: |
15286 | 5 | case X86::BI__builtin_ia32_kxnorsi: |
15287 | 7 | case X86::BI__builtin_ia32_kxnordi: |
15288 | 7 | return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true); |
15289 | 1 | case X86::BI__builtin_ia32_kxorqi: |
15290 | 3 | case X86::BI__builtin_ia32_kxorhi: |
15291 | 5 | case X86::BI__builtin_ia32_kxorsi: |
15292 | 7 | case X86::BI__builtin_ia32_kxordi: |
15293 | 7 | return EmitX86MaskLogic(*this, Instruction::Xor, Ops); |
15294 | 1 | case X86::BI__builtin_ia32_knotqi: |
15295 | 3 | case X86::BI__builtin_ia32_knothi: |
15296 | 5 | case X86::BI__builtin_ia32_knotsi: |
15297 | 9 | case X86::BI__builtin_ia32_knotdi: { |
15298 | 9 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
15299 | 9 | Value *Res = getMaskVecValue(*this, Ops[0], NumElts); |
15300 | 9 | return Builder.CreateBitCast(Builder.CreateNot(Res), |
15301 | 9 | Ops[0]->getType()); |
15302 | 5 | } |
15303 | 4 | case X86::BI__builtin_ia32_kmovb: |
15304 | 12 | case X86::BI__builtin_ia32_kmovw: |
15305 | 20 | case X86::BI__builtin_ia32_kmovd: |
15306 | 28 | case X86::BI__builtin_ia32_kmovq: { |
15307 | | // Bitcast to vXi1 type and then back to integer. This gets the mask |
15308 | | // register type into the IR, but might be optimized out depending on |
15309 | | // what's around it. |
15310 | 28 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
15311 | 28 | Value *Res = getMaskVecValue(*this, Ops[0], NumElts); |
15312 | 28 | return Builder.CreateBitCast(Res, Ops[0]->getType()); |
15313 | 20 | } |
15314 | | |
15315 | 2 | case X86::BI__builtin_ia32_kunpckdi: |
15316 | 4 | case X86::BI__builtin_ia32_kunpcksi: |
15317 | 6 | case X86::BI__builtin_ia32_kunpckhi: { |
15318 | 6 | unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); |
15319 | 6 | Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); |
15320 | 6 | Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); |
15321 | 6 | int Indices[64]; |
15322 | 230 | for (unsigned i = 0; i != NumElts; ++i224 ) |
15323 | 224 | Indices[i] = i; |
15324 | | |
15325 | | // First extract half of each vector. This gives better codegen than |
15326 | | // doing it in a single shuffle. |
15327 | 6 | LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2)); |
15328 | 6 | RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2)); |
15329 | | // Concat the vectors. |
15330 | | // NOTE: Operands are swapped to match the intrinsic definition. |
15331 | 6 | Value *Res = |
15332 | 6 | Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts)); |
15333 | 6 | return Builder.CreateBitCast(Res, Ops[0]->getType()); |
15334 | 4 | } |
15335 | | |
15336 | 1 | case X86::BI__builtin_ia32_vplzcntd_128: |
15337 | 2 | case X86::BI__builtin_ia32_vplzcntd_256: |
15338 | 3 | case X86::BI__builtin_ia32_vplzcntd_512: |
15339 | 4 | case X86::BI__builtin_ia32_vplzcntq_128: |
15340 | 5 | case X86::BI__builtin_ia32_vplzcntq_256: |
15341 | 6 | case X86::BI__builtin_ia32_vplzcntq_512: { |
15342 | 6 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); |
15343 | 6 | return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}); |
15344 | 5 | } |
15345 | 8 | case X86::BI__builtin_ia32_sqrtss: |
15346 | 15 | case X86::BI__builtin_ia32_sqrtsd: { |
15347 | 15 | Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); |
15348 | 15 | Function *F; |
15349 | 15 | if (Builder.getIsFPConstrained()) { |
15350 | 2 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
15351 | 2 | F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, |
15352 | 2 | A->getType()); |
15353 | 2 | A = Builder.CreateConstrainedFPCall(F, {A}); |
15354 | 13 | } else { |
15355 | 13 | F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); |
15356 | 13 | A = Builder.CreateCall(F, {A}); |
15357 | 13 | } |
15358 | 15 | return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); |
15359 | 8 | } |
15360 | 6 | case X86::BI__builtin_ia32_sqrtsh_round_mask: |
15361 | 28 | case X86::BI__builtin_ia32_sqrtsd_round_mask: |
15362 | 50 | case X86::BI__builtin_ia32_sqrtss_round_mask: { |
15363 | 50 | unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); |
15364 | | // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), |
15365 | | // otherwise keep the intrinsic. |
15366 | 50 | if (CC != 4) { |
15367 | 15 | Intrinsic::ID IID; |
15368 | | |
15369 | 15 | switch (BuiltinID) { |
15370 | 0 | default: |
15371 | 0 | llvm_unreachable("Unsupported intrinsic!"); |
15372 | 3 | case X86::BI__builtin_ia32_sqrtsh_round_mask: |
15373 | 3 | IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh; |
15374 | 3 | break; |
15375 | 6 | case X86::BI__builtin_ia32_sqrtsd_round_mask: |
15376 | 6 | IID = Intrinsic::x86_avx512_mask_sqrt_sd; |
15377 | 6 | break; |
15378 | 6 | case X86::BI__builtin_ia32_sqrtss_round_mask: |
15379 | 6 | IID = Intrinsic::x86_avx512_mask_sqrt_ss; |
15380 | 6 | break; |
15381 | 15 | } |
15382 | 15 | return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); |
15383 | 15 | } |
15384 | 35 | Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); |
15385 | 35 | Function *F; |
15386 | 35 | if (Builder.getIsFPConstrained()) { |
15387 | 12 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
15388 | 12 | F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, |
15389 | 12 | A->getType()); |
15390 | 12 | A = Builder.CreateConstrainedFPCall(F, A); |
15391 | 23 | } else { |
15392 | 23 | F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); |
15393 | 23 | A = Builder.CreateCall(F, A); |
15394 | 23 | } |
15395 | 35 | Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); |
15396 | 35 | A = EmitX86ScalarSelect(*this, Ops[3], A, Src); |
15397 | 35 | return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); |
15398 | 50 | } |
15399 | 8 | case X86::BI__builtin_ia32_sqrtpd256: |
15400 | 16 | case X86::BI__builtin_ia32_sqrtpd: |
15401 | 24 | case X86::BI__builtin_ia32_sqrtps256: |
15402 | 33 | case X86::BI__builtin_ia32_sqrtps: |
15403 | 34 | case X86::BI__builtin_ia32_sqrtph256: |
15404 | 35 | case X86::BI__builtin_ia32_sqrtph: |
15405 | 41 | case X86::BI__builtin_ia32_sqrtph512: |
15406 | 55 | case X86::BI__builtin_ia32_sqrtps512: |
15407 | 77 | case X86::BI__builtin_ia32_sqrtpd512: { |
15408 | 77 | if (Ops.size() == 2) { |
15409 | 42 | unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); |
15410 | | // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), |
15411 | | // otherwise keep the intrinsic. |
15412 | 42 | if (CC != 4) { |
15413 | 15 | Intrinsic::ID IID; |
15414 | | |
15415 | 15 | switch (BuiltinID) { |
15416 | 0 | default: |
15417 | 0 | llvm_unreachable("Unsupported intrinsic!"); |
15418 | 3 | case X86::BI__builtin_ia32_sqrtph512: |
15419 | 3 | IID = Intrinsic::x86_avx512fp16_sqrt_ph_512; |
15420 | 3 | break; |
15421 | 6 | case X86::BI__builtin_ia32_sqrtps512: |
15422 | 6 | IID = Intrinsic::x86_avx512_sqrt_ps_512; |
15423 | 6 | break; |
15424 | 6 | case X86::BI__builtin_ia32_sqrtpd512: |
15425 | 6 | IID = Intrinsic::x86_avx512_sqrt_pd_512; |
15426 | 6 | break; |
15427 | 15 | } |
15428 | 15 | return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); |
15429 | 15 | } |
15430 | 42 | } |
15431 | 62 | if (Builder.getIsFPConstrained()) { |
15432 | 8 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
15433 | 8 | Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, |
15434 | 8 | Ops[0]->getType()); |
15435 | 8 | return Builder.CreateConstrainedFPCall(F, Ops[0]); |
15436 | 54 | } else { |
15437 | 54 | Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); |
15438 | 54 | return Builder.CreateCall(F, Ops[0]); |
15439 | 54 | } |
15440 | 62 | } |
15441 | | |
15442 | 8 | case X86::BI__builtin_ia32_pmuludq128: |
15443 | 13 | case X86::BI__builtin_ia32_pmuludq256: |
15444 | 15 | case X86::BI__builtin_ia32_pmuludq512: |
15445 | 15 | return EmitX86Muldq(*this, /*IsSigned*/false, Ops); |
15446 | | |
15447 | 5 | case X86::BI__builtin_ia32_pmuldq128: |
15448 | 10 | case X86::BI__builtin_ia32_pmuldq256: |
15449 | 12 | case X86::BI__builtin_ia32_pmuldq512: |
15450 | 12 | return EmitX86Muldq(*this, /*IsSigned*/true, Ops); |
15451 | | |
15452 | 4 | case X86::BI__builtin_ia32_pternlogd512_mask: |
15453 | 8 | case X86::BI__builtin_ia32_pternlogq512_mask: |
15454 | 10 | case X86::BI__builtin_ia32_pternlogd128_mask: |
15455 | 12 | case X86::BI__builtin_ia32_pternlogd256_mask: |
15456 | 14 | case X86::BI__builtin_ia32_pternlogq128_mask: |
15457 | 16 | case X86::BI__builtin_ia32_pternlogq256_mask: |
15458 | 16 | return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops); |
15459 | | |
15460 | 2 | case X86::BI__builtin_ia32_pternlogd512_maskz: |
15461 | 4 | case X86::BI__builtin_ia32_pternlogq512_maskz: |
15462 | 5 | case X86::BI__builtin_ia32_pternlogd128_maskz: |
15463 | 6 | case X86::BI__builtin_ia32_pternlogd256_maskz: |
15464 | 7 | case X86::BI__builtin_ia32_pternlogq128_maskz: |
15465 | 8 | case X86::BI__builtin_ia32_pternlogq256_maskz: |
15466 | 8 | return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); |
15467 | | |
15468 | 3 | case X86::BI__builtin_ia32_vpshldd128: |
15469 | 6 | case X86::BI__builtin_ia32_vpshldd256: |
15470 | 9 | case X86::BI__builtin_ia32_vpshldd512: |
15471 | 12 | case X86::BI__builtin_ia32_vpshldq128: |
15472 | 15 | case X86::BI__builtin_ia32_vpshldq256: |
15473 | 18 | case X86::BI__builtin_ia32_vpshldq512: |
15474 | 21 | case X86::BI__builtin_ia32_vpshldw128: |
15475 | 24 | case X86::BI__builtin_ia32_vpshldw256: |
15476 | 27 | case X86::BI__builtin_ia32_vpshldw512: |
15477 | 27 | return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); |
15478 | | |
15479 | 3 | case X86::BI__builtin_ia32_vpshrdd128: |
15480 | 6 | case X86::BI__builtin_ia32_vpshrdd256: |
15481 | 9 | case X86::BI__builtin_ia32_vpshrdd512: |
15482 | 12 | case X86::BI__builtin_ia32_vpshrdq128: |
15483 | 15 | case X86::BI__builtin_ia32_vpshrdq256: |
15484 | 18 | case X86::BI__builtin_ia32_vpshrdq512: |
15485 | 21 | case X86::BI__builtin_ia32_vpshrdw128: |
15486 | 24 | case X86::BI__builtin_ia32_vpshrdw256: |
15487 | 27 | case X86::BI__builtin_ia32_vpshrdw512: |
15488 | | // Ops 0 and 1 are swapped. |
15489 | 27 | return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); |
15490 | | |
15491 | 1 | case X86::BI__builtin_ia32_vpshldvd128: |
15492 | 2 | case X86::BI__builtin_ia32_vpshldvd256: |
15493 | 3 | case X86::BI__builtin_ia32_vpshldvd512: |
15494 | 4 | case X86::BI__builtin_ia32_vpshldvq128: |
15495 | 5 | case X86::BI__builtin_ia32_vpshldvq256: |
15496 | 6 | case X86::BI__builtin_ia32_vpshldvq512: |
15497 | 7 | case X86::BI__builtin_ia32_vpshldvw128: |
15498 | 8 | case X86::BI__builtin_ia32_vpshldvw256: |
15499 | 9 | case X86::BI__builtin_ia32_vpshldvw512: |
15500 | 9 | return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); |
15501 | | |
15502 | 1 | case X86::BI__builtin_ia32_vpshrdvd128: |
15503 | 2 | case X86::BI__builtin_ia32_vpshrdvd256: |
15504 | 3 | case X86::BI__builtin_ia32_vpshrdvd512: |
15505 | 4 | case X86::BI__builtin_ia32_vpshrdvq128: |
15506 | 5 | case X86::BI__builtin_ia32_vpshrdvq256: |
15507 | 6 | case X86::BI__builtin_ia32_vpshrdvq512: |
15508 | 7 | case X86::BI__builtin_ia32_vpshrdvw128: |
15509 | 8 | case X86::BI__builtin_ia32_vpshrdvw256: |
15510 | 9 | case X86::BI__builtin_ia32_vpshrdvw512: |
15511 | | // Ops 0 and 1 are swapped. |
15512 | 9 | return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); |
15513 | | |
15514 | | // Reductions |
15515 | 4 | case X86::BI__builtin_ia32_reduce_fadd_pd512: |
15516 | 6 | case X86::BI__builtin_ia32_reduce_fadd_ps512: |
15517 | 7 | case X86::BI__builtin_ia32_reduce_fadd_ph512: |
15518 | 8 | case X86::BI__builtin_ia32_reduce_fadd_ph256: |
15519 | 9 | case X86::BI__builtin_ia32_reduce_fadd_ph128: { |
15520 | 9 | Function *F = |
15521 | 9 | CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); |
15522 | 9 | IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); |
15523 | 9 | Builder.getFastMathFlags().setAllowReassoc(); |
15524 | 9 | return Builder.CreateCall(F, {Ops[0], Ops[1]}); |
15525 | 8 | } |
15526 | 2 | case X86::BI__builtin_ia32_reduce_fmul_pd512: |
15527 | 5 | case X86::BI__builtin_ia32_reduce_fmul_ps512: |
15528 | 6 | case X86::BI__builtin_ia32_reduce_fmul_ph512: |
15529 | 7 | case X86::BI__builtin_ia32_reduce_fmul_ph256: |
15530 | 8 | case X86::BI__builtin_ia32_reduce_fmul_ph128: { |
15531 | 8 | Function *F = |
15532 | 8 | CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); |
15533 | 8 | IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); |
15534 | 8 | Builder.getFastMathFlags().setAllowReassoc(); |
15535 | 8 | return Builder.CreateCall(F, {Ops[0], Ops[1]}); |
15536 | 7 | } |
15537 | 2 | case X86::BI__builtin_ia32_reduce_fmax_pd512: |
15538 | 4 | case X86::BI__builtin_ia32_reduce_fmax_ps512: |
15539 | 5 | case X86::BI__builtin_ia32_reduce_fmax_ph512: |
15540 | 6 | case X86::BI__builtin_ia32_reduce_fmax_ph256: |
15541 | 8 | case X86::BI__builtin_ia32_reduce_fmax_ph128: { |
15542 | 8 | Function *F = |
15543 | 8 | CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); |
15544 | 8 | IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); |
15545 | 8 | Builder.getFastMathFlags().setNoNaNs(); |
15546 | 8 | return Builder.CreateCall(F, {Ops[0]}); |
15547 | 6 | } |
15548 | 2 | case X86::BI__builtin_ia32_reduce_fmin_pd512: |
15549 | 4 | case X86::BI__builtin_ia32_reduce_fmin_ps512: |
15550 | 5 | case X86::BI__builtin_ia32_reduce_fmin_ph512: |
15551 | 7 | case X86::BI__builtin_ia32_reduce_fmin_ph256: |
15552 | 8 | case X86::BI__builtin_ia32_reduce_fmin_ph128: { |
15553 | 8 | Function *F = |
15554 | 8 | CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); |
15555 | 8 | IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); |
15556 | 8 | Builder.getFastMathFlags().setNoNaNs(); |
15557 | 8 | return Builder.CreateCall(F, {Ops[0]}); |
15558 | 7 | } |
15559 | | |
15560 | | // 3DNow! |
15561 | 3 | case X86::BI__builtin_ia32_pswapdsf: |
15562 | 6 | case X86::BI__builtin_ia32_pswapdsi: { |
15563 | 6 | llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); |
15564 | 6 | Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); |
15565 | 6 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); |
15566 | 6 | return Builder.CreateCall(F, Ops, "pswapd"); |
15567 | 3 | } |
15568 | 2 | case X86::BI__builtin_ia32_rdrand16_step: |
15569 | 6 | case X86::BI__builtin_ia32_rdrand32_step: |
15570 | 7 | case X86::BI__builtin_ia32_rdrand64_step: |
15571 | 9 | case X86::BI__builtin_ia32_rdseed16_step: |
15572 | 11 | case X86::BI__builtin_ia32_rdseed32_step: |
15573 | 12 | case X86::BI__builtin_ia32_rdseed64_step: { |
15574 | 12 | Intrinsic::ID ID; |
15575 | 12 | switch (BuiltinID) { |
15576 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15577 | 2 | case X86::BI__builtin_ia32_rdrand16_step: |
15578 | 2 | ID = Intrinsic::x86_rdrand_16; |
15579 | 2 | break; |
15580 | 4 | case X86::BI__builtin_ia32_rdrand32_step: |
15581 | 4 | ID = Intrinsic::x86_rdrand_32; |
15582 | 4 | break; |
15583 | 1 | case X86::BI__builtin_ia32_rdrand64_step: |
15584 | 1 | ID = Intrinsic::x86_rdrand_64; |
15585 | 1 | break; |
15586 | 2 | case X86::BI__builtin_ia32_rdseed16_step: |
15587 | 2 | ID = Intrinsic::x86_rdseed_16; |
15588 | 2 | break; |
15589 | 2 | case X86::BI__builtin_ia32_rdseed32_step: |
15590 | 2 | ID = Intrinsic::x86_rdseed_32; |
15591 | 2 | break; |
15592 | 1 | case X86::BI__builtin_ia32_rdseed64_step: |
15593 | 1 | ID = Intrinsic::x86_rdseed_64; |
15594 | 1 | break; |
15595 | 12 | } |
15596 | | |
15597 | 12 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); |
15598 | 12 | Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), |
15599 | 12 | Ops[0]); |
15600 | 12 | return Builder.CreateExtractValue(Call, 1); |
15601 | 12 | } |
15602 | 2 | case X86::BI__builtin_ia32_addcarryx_u32: |
15603 | 4 | case X86::BI__builtin_ia32_addcarryx_u64: |
15604 | 5 | case X86::BI__builtin_ia32_subborrow_u32: |
15605 | 6 | case X86::BI__builtin_ia32_subborrow_u64: { |
15606 | 6 | Intrinsic::ID IID; |
15607 | 6 | switch (BuiltinID) { |
15608 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15609 | 2 | case X86::BI__builtin_ia32_addcarryx_u32: |
15610 | 2 | IID = Intrinsic::x86_addcarry_32; |
15611 | 2 | break; |
15612 | 2 | case X86::BI__builtin_ia32_addcarryx_u64: |
15613 | 2 | IID = Intrinsic::x86_addcarry_64; |
15614 | 2 | break; |
15615 | 1 | case X86::BI__builtin_ia32_subborrow_u32: |
15616 | 1 | IID = Intrinsic::x86_subborrow_32; |
15617 | 1 | break; |
15618 | 1 | case X86::BI__builtin_ia32_subborrow_u64: |
15619 | 1 | IID = Intrinsic::x86_subborrow_64; |
15620 | 1 | break; |
15621 | 6 | } |
15622 | | |
15623 | 6 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), |
15624 | 6 | { Ops[0], Ops[1], Ops[2] }); |
15625 | 6 | Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), |
15626 | 6 | Ops[3]); |
15627 | 6 | return Builder.CreateExtractValue(Call, 0); |
15628 | 6 | } |
15629 | | |
15630 | 2 | case X86::BI__builtin_ia32_fpclassps128_mask: |
15631 | 4 | case X86::BI__builtin_ia32_fpclassps256_mask: |
15632 | 6 | case X86::BI__builtin_ia32_fpclassps512_mask: |
15633 | 8 | case X86::BI__builtin_ia32_fpclassph128_mask: |
15634 | 10 | case X86::BI__builtin_ia32_fpclassph256_mask: |
15635 | 12 | case X86::BI__builtin_ia32_fpclassph512_mask: |
15636 | 14 | case X86::BI__builtin_ia32_fpclasspd128_mask: |
15637 | 16 | case X86::BI__builtin_ia32_fpclasspd256_mask: |
15638 | 18 | case X86::BI__builtin_ia32_fpclasspd512_mask: { |
15639 | 18 | unsigned NumElts = |
15640 | 18 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
15641 | 18 | Value *MaskIn = Ops[2]; |
15642 | 18 | Ops.erase(&Ops[2]); |
15643 | | |
15644 | 18 | Intrinsic::ID ID; |
15645 | 18 | switch (BuiltinID) { |
15646 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15647 | 2 | case X86::BI__builtin_ia32_fpclassph128_mask: |
15648 | 2 | ID = Intrinsic::x86_avx512fp16_fpclass_ph_128; |
15649 | 2 | break; |
15650 | 2 | case X86::BI__builtin_ia32_fpclassph256_mask: |
15651 | 2 | ID = Intrinsic::x86_avx512fp16_fpclass_ph_256; |
15652 | 2 | break; |
15653 | 2 | case X86::BI__builtin_ia32_fpclassph512_mask: |
15654 | 2 | ID = Intrinsic::x86_avx512fp16_fpclass_ph_512; |
15655 | 2 | break; |
15656 | 2 | case X86::BI__builtin_ia32_fpclassps128_mask: |
15657 | 2 | ID = Intrinsic::x86_avx512_fpclass_ps_128; |
15658 | 2 | break; |
15659 | 2 | case X86::BI__builtin_ia32_fpclassps256_mask: |
15660 | 2 | ID = Intrinsic::x86_avx512_fpclass_ps_256; |
15661 | 2 | break; |
15662 | 2 | case X86::BI__builtin_ia32_fpclassps512_mask: |
15663 | 2 | ID = Intrinsic::x86_avx512_fpclass_ps_512; |
15664 | 2 | break; |
15665 | 2 | case X86::BI__builtin_ia32_fpclasspd128_mask: |
15666 | 2 | ID = Intrinsic::x86_avx512_fpclass_pd_128; |
15667 | 2 | break; |
15668 | 2 | case X86::BI__builtin_ia32_fpclasspd256_mask: |
15669 | 2 | ID = Intrinsic::x86_avx512_fpclass_pd_256; |
15670 | 2 | break; |
15671 | 2 | case X86::BI__builtin_ia32_fpclasspd512_mask: |
15672 | 2 | ID = Intrinsic::x86_avx512_fpclass_pd_512; |
15673 | 2 | break; |
15674 | 18 | } |
15675 | | |
15676 | 18 | Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); |
15677 | 18 | return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); |
15678 | 18 | } |
15679 | | |
15680 | 2 | case X86::BI__builtin_ia32_vp2intersect_q_512: |
15681 | 4 | case X86::BI__builtin_ia32_vp2intersect_q_256: |
15682 | 6 | case X86::BI__builtin_ia32_vp2intersect_q_128: |
15683 | 8 | case X86::BI__builtin_ia32_vp2intersect_d_512: |
15684 | 10 | case X86::BI__builtin_ia32_vp2intersect_d_256: |
15685 | 12 | case X86::BI__builtin_ia32_vp2intersect_d_128: { |
15686 | 12 | unsigned NumElts = |
15687 | 12 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
15688 | 12 | Intrinsic::ID ID; |
15689 | | |
15690 | 12 | switch (BuiltinID) { |
15691 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15692 | 2 | case X86::BI__builtin_ia32_vp2intersect_q_512: |
15693 | 2 | ID = Intrinsic::x86_avx512_vp2intersect_q_512; |
15694 | 2 | break; |
15695 | 2 | case X86::BI__builtin_ia32_vp2intersect_q_256: |
15696 | 2 | ID = Intrinsic::x86_avx512_vp2intersect_q_256; |
15697 | 2 | break; |
15698 | 2 | case X86::BI__builtin_ia32_vp2intersect_q_128: |
15699 | 2 | ID = Intrinsic::x86_avx512_vp2intersect_q_128; |
15700 | 2 | break; |
15701 | 2 | case X86::BI__builtin_ia32_vp2intersect_d_512: |
15702 | 2 | ID = Intrinsic::x86_avx512_vp2intersect_d_512; |
15703 | 2 | break; |
15704 | 2 | case X86::BI__builtin_ia32_vp2intersect_d_256: |
15705 | 2 | ID = Intrinsic::x86_avx512_vp2intersect_d_256; |
15706 | 2 | break; |
15707 | 2 | case X86::BI__builtin_ia32_vp2intersect_d_128: |
15708 | 2 | ID = Intrinsic::x86_avx512_vp2intersect_d_128; |
15709 | 2 | break; |
15710 | 12 | } |
15711 | | |
15712 | 12 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); |
15713 | 12 | Value *Result = Builder.CreateExtractValue(Call, 0); |
15714 | 12 | Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); |
15715 | 12 | Builder.CreateDefaultAlignedStore(Result, Ops[2]); |
15716 | | |
15717 | 12 | Result = Builder.CreateExtractValue(Call, 1); |
15718 | 12 | Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); |
15719 | 12 | return Builder.CreateDefaultAlignedStore(Result, Ops[3]); |
15720 | 12 | } |
15721 | | |
15722 | 0 | case X86::BI__builtin_ia32_vpmultishiftqb128: |
15723 | 0 | case X86::BI__builtin_ia32_vpmultishiftqb256: |
15724 | 0 | case X86::BI__builtin_ia32_vpmultishiftqb512: { |
15725 | 0 | Intrinsic::ID ID; |
15726 | 0 | switch (BuiltinID) { |
15727 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15728 | 0 | case X86::BI__builtin_ia32_vpmultishiftqb128: |
15729 | 0 | ID = Intrinsic::x86_avx512_pmultishift_qb_128; |
15730 | 0 | break; |
15731 | 0 | case X86::BI__builtin_ia32_vpmultishiftqb256: |
15732 | 0 | ID = Intrinsic::x86_avx512_pmultishift_qb_256; |
15733 | 0 | break; |
15734 | 0 | case X86::BI__builtin_ia32_vpmultishiftqb512: |
15735 | 0 | ID = Intrinsic::x86_avx512_pmultishift_qb_512; |
15736 | 0 | break; |
15737 | 0 | } |
15738 | | |
15739 | 0 | return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); |
15740 | 0 | } |
15741 | | |
15742 | 1 | case X86::BI__builtin_ia32_vpshufbitqmb128_mask: |
15743 | 2 | case X86::BI__builtin_ia32_vpshufbitqmb256_mask: |
15744 | 3 | case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { |
15745 | 3 | unsigned NumElts = |
15746 | 3 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
15747 | 3 | Value *MaskIn = Ops[2]; |
15748 | 3 | Ops.erase(&Ops[2]); |
15749 | | |
15750 | 3 | Intrinsic::ID ID; |
15751 | 3 | switch (BuiltinID) { |
15752 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15753 | 1 | case X86::BI__builtin_ia32_vpshufbitqmb128_mask: |
15754 | 1 | ID = Intrinsic::x86_avx512_vpshufbitqmb_128; |
15755 | 1 | break; |
15756 | 1 | case X86::BI__builtin_ia32_vpshufbitqmb256_mask: |
15757 | 1 | ID = Intrinsic::x86_avx512_vpshufbitqmb_256; |
15758 | 1 | break; |
15759 | 1 | case X86::BI__builtin_ia32_vpshufbitqmb512_mask: |
15760 | 1 | ID = Intrinsic::x86_avx512_vpshufbitqmb_512; |
15761 | 1 | break; |
15762 | 3 | } |
15763 | | |
15764 | 3 | Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); |
15765 | 3 | return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); |
15766 | 3 | } |
15767 | | |
15768 | | // packed comparison intrinsics |
15769 | 7 | case X86::BI__builtin_ia32_cmpeqps: |
15770 | 18 | case X86::BI__builtin_ia32_cmpeqpd: |
15771 | 18 | return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false); |
15772 | 6 | case X86::BI__builtin_ia32_cmpltps: |
15773 | 20 | case X86::BI__builtin_ia32_cmpltpd: |
15774 | 20 | return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true); |
15775 | 6 | case X86::BI__builtin_ia32_cmpleps: |
15776 | 20 | case X86::BI__builtin_ia32_cmplepd: |
15777 | 20 | return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true); |
15778 | 3 | case X86::BI__builtin_ia32_cmpunordps: |
15779 | 10 | case X86::BI__builtin_ia32_cmpunordpd: |
15780 | 10 | return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false); |
15781 | 3 | case X86::BI__builtin_ia32_cmpneqps: |
15782 | 10 | case X86::BI__builtin_ia32_cmpneqpd: |
15783 | 10 | return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false); |
15784 | 6 | case X86::BI__builtin_ia32_cmpnltps: |
15785 | 20 | case X86::BI__builtin_ia32_cmpnltpd: |
15786 | 20 | return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true); |
15787 | 6 | case X86::BI__builtin_ia32_cmpnleps: |
15788 | 20 | case X86::BI__builtin_ia32_cmpnlepd: |
15789 | 20 | return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true); |
15790 | 3 | case X86::BI__builtin_ia32_cmpordps: |
15791 | 10 | case X86::BI__builtin_ia32_cmpordpd: |
15792 | 10 | return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); |
15793 | 81 | case X86::BI__builtin_ia32_cmpph128_mask: |
15794 | 164 | case X86::BI__builtin_ia32_cmpph256_mask: |
15795 | 260 | case X86::BI__builtin_ia32_cmpph512_mask: |
15796 | 388 | case X86::BI__builtin_ia32_cmpps128_mask: |
15797 | 516 | case X86::BI__builtin_ia32_cmpps256_mask: |
15798 | 746 | case X86::BI__builtin_ia32_cmpps512_mask: |
15799 | 874 | case X86::BI__builtin_ia32_cmppd128_mask: |
15800 | 1.00k | case X86::BI__builtin_ia32_cmppd256_mask: |
15801 | 1.23k | case X86::BI__builtin_ia32_cmppd512_mask: |
15802 | 1.23k | IsMaskFCmp = true; |
15803 | 1.23k | [[fallthrough]]; |
15804 | 1.47k | case X86::BI__builtin_ia32_cmpps: |
15805 | 1.70k | case X86::BI__builtin_ia32_cmpps256: |
15806 | 1.94k | case X86::BI__builtin_ia32_cmppd: |
15807 | 2.17k | case X86::BI__builtin_ia32_cmppd256: { |
15808 | | // Lowering vector comparisons to fcmp instructions, while |
15809 | | // ignoring signalling behaviour requested |
15810 | | // ignoring rounding mode requested |
15811 | | // This is only possible if fp-model is not strict and FENV_ACCESS is off. |
15812 | | |
15813 | | // The third argument is the comparison condition, and integer in the |
15814 | | // range [0, 31] |
15815 | 2.17k | unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f; |
15816 | | |
15817 | | // Lowering to IR fcmp instruction. |
15818 | | // Ignoring requested signaling behaviour, |
15819 | | // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT. |
15820 | 2.17k | FCmpInst::Predicate Pred; |
15821 | 2.17k | bool IsSignaling; |
15822 | | // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling |
15823 | | // behavior is inverted. We'll handle that after the switch. |
15824 | 2.17k | switch (CC & 0xf) { |
15825 | 175 | case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break; |
15826 | 140 | case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break; |
15827 | 140 | case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break; |
15828 | 140 | case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break; |
15829 | 140 | case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break; |
15830 | 140 | case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break; |
15831 | 140 | case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break; |
15832 | 140 | case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break; |
15833 | 128 | case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break; |
15834 | 128 | case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break; |
15835 | 128 | case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break; |
15836 | 128 | case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break; |
15837 | 128 | case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break; |
15838 | 128 | case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break; |
15839 | 128 | case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break; |
15840 | 128 | case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break; |
15841 | 0 | default: llvm_unreachable("Unhandled CC"); |
15842 | 2.17k | } |
15843 | | |
15844 | | // Invert the signalling behavior for 16-31. |
15845 | 2.17k | if (CC & 0x10) |
15846 | 1.02k | IsSignaling = !IsSignaling; |
15847 | | |
15848 | | // If the predicate is true or false and we're using constrained intrinsics, |
15849 | | // we don't have a compare intrinsic we can use. Just use the legacy X86 |
15850 | | // specific intrinsic. |
15851 | | // If the intrinsic is mask enabled and we're using constrained intrinsics, |
15852 | | // use the legacy X86 specific intrinsic. |
15853 | 2.17k | if (Builder.getIsFPConstrained() && |
15854 | 2.17k | (710 Pred == FCmpInst::FCMP_TRUE710 || Pred == FCmpInst::FCMP_FALSE666 || |
15855 | 710 | IsMaskFCmp622 )) { |
15856 | | |
15857 | 486 | Intrinsic::ID IID; |
15858 | 486 | switch (BuiltinID) { |
15859 | 0 | default: llvm_unreachable("Unexpected builtin"); |
15860 | 8 | case X86::BI__builtin_ia32_cmpps: |
15861 | 8 | IID = Intrinsic::x86_sse_cmp_ps; |
15862 | 8 | break; |
15863 | 8 | case X86::BI__builtin_ia32_cmpps256: |
15864 | 8 | IID = Intrinsic::x86_avx_cmp_ps_256; |
15865 | 8 | break; |
15866 | 8 | case X86::BI__builtin_ia32_cmppd: |
15867 | 8 | IID = Intrinsic::x86_sse2_cmp_pd; |
15868 | 8 | break; |
15869 | 8 | case X86::BI__builtin_ia32_cmppd256: |
15870 | 8 | IID = Intrinsic::x86_avx_cmp_pd_256; |
15871 | 8 | break; |
15872 | 17 | case X86::BI__builtin_ia32_cmpph128_mask: |
15873 | 17 | IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128; |
15874 | 17 | break; |
15875 | 19 | case X86::BI__builtin_ia32_cmpph256_mask: |
15876 | 19 | IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256; |
15877 | 19 | break; |
15878 | 30 | case X86::BI__builtin_ia32_cmpph512_mask: |
15879 | 30 | IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512; |
15880 | 30 | break; |
15881 | 66 | case X86::BI__builtin_ia32_cmpps512_mask: |
15882 | 66 | IID = Intrinsic::x86_avx512_mask_cmp_ps_512; |
15883 | 66 | break; |
15884 | 66 | case X86::BI__builtin_ia32_cmppd512_mask: |
15885 | 66 | IID = Intrinsic::x86_avx512_mask_cmp_pd_512; |
15886 | 66 | break; |
15887 | 64 | case X86::BI__builtin_ia32_cmpps128_mask: |
15888 | 64 | IID = Intrinsic::x86_avx512_mask_cmp_ps_128; |
15889 | 64 | break; |
15890 | 64 | case X86::BI__builtin_ia32_cmpps256_mask: |
15891 | 64 | IID = Intrinsic::x86_avx512_mask_cmp_ps_256; |
15892 | 64 | break; |
15893 | 64 | case X86::BI__builtin_ia32_cmppd128_mask: |
15894 | 64 | IID = Intrinsic::x86_avx512_mask_cmp_pd_128; |
15895 | 64 | break; |
15896 | 64 | case X86::BI__builtin_ia32_cmppd256_mask: |
15897 | 64 | IID = Intrinsic::x86_avx512_mask_cmp_pd_256; |
15898 | 64 | break; |
15899 | 486 | } |
15900 | | |
15901 | 486 | Function *Intr = CGM.getIntrinsic(IID); |
15902 | 486 | if (IsMaskFCmp) { |
15903 | 454 | unsigned NumElts = |
15904 | 454 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
15905 | 454 | Ops[3] = getMaskVecValue(*this, Ops[3], NumElts); |
15906 | 454 | Value *Cmp = Builder.CreateCall(Intr, Ops); |
15907 | 454 | return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr); |
15908 | 454 | } |
15909 | | |
15910 | 32 | return Builder.CreateCall(Intr, Ops); |
15911 | 486 | } |
15912 | | |
15913 | | // Builtins without the _mask suffix return a vector of integers |
15914 | | // of the same width as the input vectors |
15915 | 1.69k | if (IsMaskFCmp) { |
15916 | | // We ignore SAE if strict FP is disabled. We only keep precise |
15917 | | // exception behavior under strict FP. |
15918 | | // NOTE: If strict FP does ever go through here a CGFPOptionsRAII |
15919 | | // object will be required. |
15920 | 780 | unsigned NumElts = |
15921 | 780 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); |
15922 | 780 | Value *Cmp; |
15923 | 780 | if (IsSignaling) |
15924 | 384 | Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); |
15925 | 396 | else |
15926 | 396 | Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); |
15927 | 780 | return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); |
15928 | 780 | } |
15929 | | |
15930 | 913 | return getVectorFCmpIR(Pred, IsSignaling); |
15931 | 1.69k | } |
15932 | | |
15933 | | // SSE scalar comparison intrinsics |
15934 | 2 | case X86::BI__builtin_ia32_cmpeqss: |
15935 | 2 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); |
15936 | 6 | case X86::BI__builtin_ia32_cmpltss: |
15937 | 6 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); |
15938 | 6 | case X86::BI__builtin_ia32_cmpless: |
15939 | 6 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); |
15940 | 2 | case X86::BI__builtin_ia32_cmpunordss: |
15941 | 2 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); |
15942 | 2 | case X86::BI__builtin_ia32_cmpneqss: |
15943 | 2 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); |
15944 | 6 | case X86::BI__builtin_ia32_cmpnltss: |
15945 | 6 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); |
15946 | 6 | case X86::BI__builtin_ia32_cmpnless: |
15947 | 6 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); |
15948 | 2 | case X86::BI__builtin_ia32_cmpordss: |
15949 | 2 | return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); |
15950 | 5 | case X86::BI__builtin_ia32_cmpeqsd: |
15951 | 5 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); |
15952 | 12 | case X86::BI__builtin_ia32_cmpltsd: |
15953 | 12 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); |
15954 | 12 | case X86::BI__builtin_ia32_cmplesd: |
15955 | 12 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); |
15956 | 5 | case X86::BI__builtin_ia32_cmpunordsd: |
15957 | 5 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); |
15958 | 5 | case X86::BI__builtin_ia32_cmpneqsd: |
15959 | 5 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); |
15960 | 12 | case X86::BI__builtin_ia32_cmpnltsd: |
15961 | 12 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); |
15962 | 12 | case X86::BI__builtin_ia32_cmpnlesd: |
15963 | 12 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); |
15964 | 5 | case X86::BI__builtin_ia32_cmpordsd: |
15965 | 5 | return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); |
15966 | | |
15967 | | // f16c half2float intrinsics |
15968 | 4 | case X86::BI__builtin_ia32_vcvtph2ps: |
15969 | 6 | case X86::BI__builtin_ia32_vcvtph2ps256: |
15970 | 16 | case X86::BI__builtin_ia32_vcvtph2ps_mask: |
15971 | 26 | case X86::BI__builtin_ia32_vcvtph2ps256_mask: |
15972 | 74 | case X86::BI__builtin_ia32_vcvtph2ps512_mask: { |
15973 | 74 | CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); |
15974 | 74 | return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); |
15975 | 26 | } |
15976 | | |
15977 | | // AVX512 bf16 intrinsics |
15978 | 3 | case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { |
15979 | 3 | Ops[2] = getMaskVecValue( |
15980 | 3 | *this, Ops[2], |
15981 | 3 | cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements()); |
15982 | 3 | Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; |
15983 | 3 | return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); |
15984 | 26 | } |
15985 | 1 | case X86::BI__builtin_ia32_cvtsbf162ss_32: |
15986 | 1 | return Builder.CreateFPExt(Ops[0], Builder.getFloatTy()); |
15987 | | |
15988 | 2 | case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: |
15989 | 5 | case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { |
15990 | 5 | Intrinsic::ID IID; |
15991 | 5 | switch (BuiltinID) { |
15992 | 0 | default: llvm_unreachable("Unsupported intrinsic!"); |
15993 | 2 | case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: |
15994 | 2 | IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256; |
15995 | 2 | break; |
15996 | 3 | case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: |
15997 | 3 | IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512; |
15998 | 3 | break; |
15999 | 5 | } |
16000 | 5 | Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]); |
16001 | 5 | return EmitX86Select(*this, Ops[2], Res, Ops[1]); |
16002 | 5 | } |
16003 | | |
16004 | 8 | case X86::BI__cpuid: |
16005 | 14 | case X86::BI__cpuidex: { |
16006 | 14 | Value *FuncId = EmitScalarExpr(E->getArg(1)); |
16007 | 14 | Value *SubFuncId = BuiltinID == X86::BI__cpuidex |
16008 | 14 | ? EmitScalarExpr(E->getArg(2))6 |
16009 | 14 | : llvm::ConstantInt::get(Int32Ty, 0)8 ; |
16010 | | |
16011 | 14 | llvm::StructType *CpuidRetTy = |
16012 | 14 | llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty); |
16013 | 14 | llvm::FunctionType *FTy = |
16014 | 14 | llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false); |
16015 | | |
16016 | 14 | StringRef Asm, Constraints; |
16017 | 14 | if (getTarget().getTriple().getArch() == llvm::Triple::x86) { |
16018 | 6 | Asm = "cpuid"; |
16019 | 6 | Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}"; |
16020 | 8 | } else { |
16021 | | // x86-64 uses %rbx as the base register, so preserve it. |
16022 | 8 | Asm = "xchgq %rbx, ${1:q}\n" |
16023 | 8 | "cpuid\n" |
16024 | 8 | "xchgq %rbx, ${1:q}"; |
16025 | 8 | Constraints = "={ax},=r,={cx},={dx},0,2"; |
16026 | 8 | } |
16027 | | |
16028 | 14 | llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, |
16029 | 14 | /*hasSideEffects=*/false); |
16030 | 14 | Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId}); |
16031 | 14 | Value *BasePtr = EmitScalarExpr(E->getArg(0)); |
16032 | 14 | Value *Store = nullptr; |
16033 | 70 | for (unsigned i = 0; i < 4; i++56 ) { |
16034 | 56 | Value *Extracted = Builder.CreateExtractValue(IACall, i); |
16035 | 56 | Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i); |
16036 | 56 | Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign()); |
16037 | 56 | } |
16038 | | |
16039 | | // Return the last store instruction to signal that we have emitted the |
16040 | | // the intrinsic. |
16041 | 14 | return Store; |
16042 | 8 | } |
16043 | | |
16044 | 2 | case X86::BI__emul: |
16045 | 4 | case X86::BI__emulu: { |
16046 | 4 | llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); |
16047 | 4 | bool isSigned = (BuiltinID == X86::BI__emul); |
16048 | 4 | Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); |
16049 | 4 | Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); |
16050 | 4 | return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); |
16051 | 2 | } |
16052 | 1 | case X86::BI__mulh: |
16053 | 2 | case X86::BI__umulh: |
16054 | 3 | case X86::BI_mul128: |
16055 | 4 | case X86::BI_umul128: { |
16056 | 4 | llvm::Type *ResType = ConvertType(E->getType()); |
16057 | 4 | llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); |
16058 | | |
16059 | 4 | bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul1283 ); |
16060 | 4 | Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); |
16061 | 4 | Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); |
16062 | | |
16063 | 4 | Value *MulResult, *HigherBits; |
16064 | 4 | if (IsSigned) { |
16065 | 2 | MulResult = Builder.CreateNSWMul(LHS, RHS); |
16066 | 2 | HigherBits = Builder.CreateAShr(MulResult, 64); |
16067 | 2 | } else { |
16068 | 2 | MulResult = Builder.CreateNUWMul(LHS, RHS); |
16069 | 2 | HigherBits = Builder.CreateLShr(MulResult, 64); |
16070 | 2 | } |
16071 | 4 | HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); |
16072 | | |
16073 | 4 | if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh3 ) |
16074 | 2 | return HigherBits; |
16075 | | |
16076 | 2 | Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); |
16077 | 2 | Builder.CreateStore(HigherBits, HighBitsAddress); |
16078 | 2 | return Builder.CreateIntCast(MulResult, ResType, IsSigned); |
16079 | 4 | } |
16080 | | |
16081 | 1 | case X86::BI__faststorefence: { |
16082 | 1 | return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, |
16083 | 1 | llvm::SyncScope::System); |
16084 | 4 | } |
16085 | 1 | case X86::BI__shiftleft128: |
16086 | 2 | case X86::BI__shiftright128: { |
16087 | 2 | llvm::Function *F = CGM.getIntrinsic( |
16088 | 2 | BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl1 : Intrinsic::fshr1 , |
16089 | 2 | Int64Ty); |
16090 | | // Flip low/high ops and zero-extend amount to matching type. |
16091 | | // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt) |
16092 | | // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt) |
16093 | 2 | std::swap(Ops[0], Ops[1]); |
16094 | 2 | Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); |
16095 | 2 | return Builder.CreateCall(F, Ops); |
16096 | 1 | } |
16097 | 2 | case X86::BI_ReadWriteBarrier: |
16098 | 4 | case X86::BI_ReadBarrier: |
16099 | 6 | case X86::BI_WriteBarrier: { |
16100 | 6 | return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, |
16101 | 6 | llvm::SyncScope::SingleThread); |
16102 | 4 | } |
16103 | | |
16104 | 2 | case X86::BI_AddressOfReturnAddress: { |
16105 | 2 | Function *F = |
16106 | 2 | CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); |
16107 | 2 | return Builder.CreateCall(F); |
16108 | 4 | } |
16109 | 4 | case X86::BI__stosb: { |
16110 | | // We treat __stosb as a volatile memset - it may not generate "rep stosb" |
16111 | | // instruction, but it will create a memset that won't be optimized away. |
16112 | 4 | return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true); |
16113 | 4 | } |
16114 | 2 | case X86::BI__ud2: |
16115 | | // llvm.trap makes a ud2a instruction on x86. |
16116 | 2 | return EmitTrapCall(Intrinsic::trap); |
16117 | 2 | case X86::BI__int2c: { |
16118 | | // This syscall signals a driver assertion failure in x86 NT kernels. |
16119 | 2 | llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); |
16120 | 2 | llvm::InlineAsm *IA = |
16121 | 2 | llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true); |
16122 | 2 | llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( |
16123 | 2 | getLLVMContext(), llvm::AttributeList::FunctionIndex, |
16124 | 2 | llvm::Attribute::NoReturn); |
16125 | 2 | llvm::CallInst *CI = Builder.CreateCall(IA); |
16126 | 2 | CI->setAttributes(NoReturnAttr); |
16127 | 2 | return CI; |
16128 | 4 | } |
16129 | 1 | case X86::BI__readfsbyte: |
16130 | 2 | case X86::BI__readfsword: |
16131 | 3 | case X86::BI__readfsdword: |
16132 | 4 | case X86::BI__readfsqword: { |
16133 | 4 | llvm::Type *IntTy = ConvertType(E->getType()); |
16134 | 4 | Value *Ptr = Builder.CreateIntToPtr( |
16135 | 4 | Ops[0], llvm::PointerType::get(getLLVMContext(), 257)); |
16136 | 4 | LoadInst *Load = Builder.CreateAlignedLoad( |
16137 | 4 | IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); |
16138 | 4 | Load->setVolatile(true); |
16139 | 4 | return Load; |
16140 | 3 | } |
16141 | 1 | case X86::BI__readgsbyte: |
16142 | 2 | case X86::BI__readgsword: |
16143 | 3 | case X86::BI__readgsdword: |
16144 | 4 | case X86::BI__readgsqword: { |
16145 | 4 | llvm::Type *IntTy = ConvertType(E->getType()); |
16146 | 4 | Value *Ptr = Builder.CreateIntToPtr( |
16147 | 4 | Ops[0], llvm::PointerType::get(getLLVMContext(), 256)); |
16148 | 4 | LoadInst *Load = Builder.CreateAlignedLoad( |
16149 | 4 | IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); |
16150 | 4 | Load->setVolatile(true); |
16151 | 4 | return Load; |
16152 | 3 | } |
16153 | 2 | case X86::BI__builtin_ia32_encodekey128_u32: { |
16154 | 2 | Intrinsic::ID IID = Intrinsic::x86_encodekey128; |
16155 | | |
16156 | 2 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); |
16157 | | |
16158 | 8 | for (int i = 0; i < 3; ++i6 ) { |
16159 | 6 | Value *Extract = Builder.CreateExtractValue(Call, i + 1); |
16160 | 6 | Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16); |
16161 | 6 | Builder.CreateAlignedStore(Extract, Ptr, Align(1)); |
16162 | 6 | } |
16163 | | |
16164 | 2 | return Builder.CreateExtractValue(Call, 0); |
16165 | 3 | } |
16166 | 2 | case X86::BI__builtin_ia32_encodekey256_u32: { |
16167 | 2 | Intrinsic::ID IID = Intrinsic::x86_encodekey256; |
16168 | | |
16169 | 2 | Value *Call = |
16170 | 2 | Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); |
16171 | | |
16172 | 10 | for (int i = 0; i < 4; ++i8 ) { |
16173 | 8 | Value *Extract = Builder.CreateExtractValue(Call, i + 1); |
16174 | 8 | Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16); |
16175 | 8 | Builder.CreateAlignedStore(Extract, Ptr, Align(1)); |
16176 | 8 | } |
16177 | | |
16178 | 2 | return Builder.CreateExtractValue(Call, 0); |
16179 | 3 | } |
16180 | 2 | case X86::BI__builtin_ia32_aesenc128kl_u8: |
16181 | 4 | case X86::BI__builtin_ia32_aesdec128kl_u8: |
16182 | 6 | case X86::BI__builtin_ia32_aesenc256kl_u8: |
16183 | 8 | case X86::BI__builtin_ia32_aesdec256kl_u8: { |
16184 | 8 | Intrinsic::ID IID; |
16185 | 8 | StringRef BlockName; |
16186 | 8 | switch (BuiltinID) { |
16187 | 0 | default: |
16188 | 0 | llvm_unreachable("Unexpected builtin"); |
16189 | 2 | case X86::BI__builtin_ia32_aesenc128kl_u8: |
16190 | 2 | IID = Intrinsic::x86_aesenc128kl; |
16191 | 2 | BlockName = "aesenc128kl"; |
16192 | 2 | break; |
16193 | 2 | case X86::BI__builtin_ia32_aesdec128kl_u8: |
16194 | 2 | IID = Intrinsic::x86_aesdec128kl; |
16195 | 2 | BlockName = "aesdec128kl"; |
16196 | 2 | break; |
16197 | 2 | case X86::BI__builtin_ia32_aesenc256kl_u8: |
16198 | 2 | IID = Intrinsic::x86_aesenc256kl; |
16199 | 2 | BlockName = "aesenc256kl"; |
16200 | 2 | break; |
16201 | 2 | case X86::BI__builtin_ia32_aesdec256kl_u8: |
16202 | 2 | IID = Intrinsic::x86_aesdec256kl; |
16203 | 2 | BlockName = "aesdec256kl"; |
16204 | 2 | break; |
16205 | 8 | } |
16206 | | |
16207 | 8 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]}); |
16208 | | |
16209 | 8 | BasicBlock *NoError = |
16210 | 8 | createBasicBlock(BlockName + "_no_error", this->CurFn); |
16211 | 8 | BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn); |
16212 | 8 | BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn); |
16213 | | |
16214 | 8 | Value *Ret = Builder.CreateExtractValue(Call, 0); |
16215 | 8 | Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); |
16216 | 8 | Value *Out = Builder.CreateExtractValue(Call, 1); |
16217 | 8 | Builder.CreateCondBr(Succ, NoError, Error); |
16218 | | |
16219 | 8 | Builder.SetInsertPoint(NoError); |
16220 | 8 | Builder.CreateDefaultAlignedStore(Out, Ops[0]); |
16221 | 8 | Builder.CreateBr(End); |
16222 | | |
16223 | 8 | Builder.SetInsertPoint(Error); |
16224 | 8 | Constant *Zero = llvm::Constant::getNullValue(Out->getType()); |
16225 | 8 | Builder.CreateDefaultAlignedStore(Zero, Ops[0]); |
16226 | 8 | Builder.CreateBr(End); |
16227 | | |
16228 | 8 | Builder.SetInsertPoint(End); |
16229 | 8 | return Builder.CreateExtractValue(Call, 0); |
16230 | 8 | } |
16231 | 2 | case X86::BI__builtin_ia32_aesencwide128kl_u8: |
16232 | 4 | case X86::BI__builtin_ia32_aesdecwide128kl_u8: |
16233 | 6 | case X86::BI__builtin_ia32_aesencwide256kl_u8: |
16234 | 8 | case X86::BI__builtin_ia32_aesdecwide256kl_u8: { |
16235 | 8 | Intrinsic::ID IID; |
16236 | 8 | StringRef BlockName; |
16237 | 8 | switch (BuiltinID) { |
16238 | 2 | case X86::BI__builtin_ia32_aesencwide128kl_u8: |
16239 | 2 | IID = Intrinsic::x86_aesencwide128kl; |
16240 | 2 | BlockName = "aesencwide128kl"; |
16241 | 2 | break; |
16242 | 2 | case X86::BI__builtin_ia32_aesdecwide128kl_u8: |
16243 | 2 | IID = Intrinsic::x86_aesdecwide128kl; |
16244 | 2 | BlockName = "aesdecwide128kl"; |
16245 | 2 | break; |
16246 | 2 | case X86::BI__builtin_ia32_aesencwide256kl_u8: |
16247 | 2 | IID = Intrinsic::x86_aesencwide256kl; |
16248 | 2 | BlockName = "aesencwide256kl"; |
16249 | 2 | break; |
16250 | 2 | case X86::BI__builtin_ia32_aesdecwide256kl_u8: |
16251 | 2 | IID = Intrinsic::x86_aesdecwide256kl; |
16252 | 2 | BlockName = "aesdecwide256kl"; |
16253 | 2 | break; |
16254 | 8 | } |
16255 | | |
16256 | 8 | llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2); |
16257 | 8 | Value *InOps[9]; |
16258 | 8 | InOps[0] = Ops[2]; |
16259 | 72 | for (int i = 0; i != 8; ++i64 ) { |
16260 | 64 | Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i); |
16261 | 64 | InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16)); |
16262 | 64 | } |
16263 | | |
16264 | 8 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps); |
16265 | | |
16266 | 8 | BasicBlock *NoError = |
16267 | 8 | createBasicBlock(BlockName + "_no_error", this->CurFn); |
16268 | 8 | BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn); |
16269 | 8 | BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn); |
16270 | | |
16271 | 8 | Value *Ret = Builder.CreateExtractValue(Call, 0); |
16272 | 8 | Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); |
16273 | 8 | Builder.CreateCondBr(Succ, NoError, Error); |
16274 | | |
16275 | 8 | Builder.SetInsertPoint(NoError); |
16276 | 72 | for (int i = 0; i != 8; ++i64 ) { |
16277 | 64 | Value *Extract = Builder.CreateExtractValue(Call, i + 1); |
16278 | 64 | Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i); |
16279 | 64 | Builder.CreateAlignedStore(Extract, Ptr, Align(16)); |
16280 | 64 | } |
16281 | 8 | Builder.CreateBr(End); |
16282 | | |
16283 | 8 | Builder.SetInsertPoint(Error); |
16284 | 72 | for (int i = 0; i != 8; ++i64 ) { |
16285 | 64 | Value *Out = Builder.CreateExtractValue(Call, i + 1); |
16286 | 64 | Constant *Zero = llvm::Constant::getNullValue(Out->getType()); |
16287 | 64 | Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i); |
16288 | 64 | Builder.CreateAlignedStore(Zero, Ptr, Align(16)); |
16289 | 64 | } |
16290 | 8 | Builder.CreateBr(End); |
16291 | | |
16292 | 8 | Builder.SetInsertPoint(End); |
16293 | 8 | return Builder.CreateExtractValue(Call, 0); |
16294 | 8 | } |
16295 | 2 | case X86::BI__builtin_ia32_vfcmaddcph512_mask: |
16296 | 2 | IsConjFMA = true; |
16297 | 2 | [[fallthrough]]; |
16298 | 4 | case X86::BI__builtin_ia32_vfmaddcph512_mask: { |
16299 | 4 | Intrinsic::ID IID = IsConjFMA |
16300 | 4 | ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_5122 |
16301 | 4 | : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_5122 ; |
16302 | 4 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); |
16303 | 4 | return EmitX86Select(*this, Ops[3], Call, Ops[0]); |
16304 | 2 | } |
16305 | 2 | case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: |
16306 | 2 | IsConjFMA = true; |
16307 | 2 | [[fallthrough]]; |
16308 | 4 | case X86::BI__builtin_ia32_vfmaddcsh_round_mask: { |
16309 | 4 | Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh2 |
16310 | 4 | : Intrinsic::x86_avx512fp16_mask_vfmadd_csh2 ; |
16311 | 4 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); |
16312 | 4 | Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1)); |
16313 | 4 | return EmitX86Select(*this, And, Call, Ops[0]); |
16314 | 2 | } |
16315 | 2 | case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: |
16316 | 2 | IsConjFMA = true; |
16317 | 2 | [[fallthrough]]; |
16318 | 4 | case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: { |
16319 | 4 | Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh2 |
16320 | 4 | : Intrinsic::x86_avx512fp16_mask_vfmadd_csh2 ; |
16321 | 4 | Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); |
16322 | 4 | static constexpr int Mask[] = {0, 5, 6, 7}; |
16323 | 4 | return Builder.CreateShuffleVector(Call, Ops[2], Mask); |
16324 | 2 | } |
16325 | 2 | case X86::BI__builtin_ia32_prefetchi: |
16326 | 2 | return Builder.CreateCall( |
16327 | 2 | CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()), |
16328 | 2 | {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1], |
16329 | 2 | llvm::ConstantInt::get(Int32Ty, 0)}); |
16330 | 9.15k | } |
16331 | 9.15k | } |
16332 | | |
16333 | | Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, |
16334 | 269 | const CallExpr *E) { |
16335 | | // Do not emit the builtin arguments in the arguments of a function call, |
16336 | | // because the evaluation order of function arguments is not specified in C++. |
16337 | | // This is important when testing to ensure the arguments are emitted in the |
16338 | | // same order every time. Eg: |
16339 | | // Instead of: |
16340 | | // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), |
16341 | | // EmitScalarExpr(E->getArg(1)), "swdiv"); |
16342 | | // Use: |
16343 | | // Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16344 | | // Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16345 | | // return Builder.CreateFDiv(Op0, Op1, "swdiv") |
16346 | | |
16347 | 269 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
16348 | | |
16349 | 269 | switch (BuiltinID) { |
16350 | 0 | default: return nullptr; |
16351 | | |
16352 | | // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we |
16353 | | // call __builtin_readcyclecounter. |
16354 | 0 | case PPC::BI__builtin_ppc_get_timebase: |
16355 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); |
16356 | | |
16357 | | // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr |
16358 | 0 | case PPC::BI__builtin_altivec_lvx: |
16359 | 0 | case PPC::BI__builtin_altivec_lvxl: |
16360 | 0 | case PPC::BI__builtin_altivec_lvebx: |
16361 | 0 | case PPC::BI__builtin_altivec_lvehx: |
16362 | 0 | case PPC::BI__builtin_altivec_lvewx: |
16363 | 0 | case PPC::BI__builtin_altivec_lvsl: |
16364 | 0 | case PPC::BI__builtin_altivec_lvsr: |
16365 | 0 | case PPC::BI__builtin_vsx_lxvd2x: |
16366 | 0 | case PPC::BI__builtin_vsx_lxvw4x: |
16367 | 0 | case PPC::BI__builtin_vsx_lxvd2x_be: |
16368 | 0 | case PPC::BI__builtin_vsx_lxvw4x_be: |
16369 | 0 | case PPC::BI__builtin_vsx_lxvl: |
16370 | 0 | case PPC::BI__builtin_vsx_lxvll: |
16371 | 0 | { |
16372 | 0 | SmallVector<Value *, 2> Ops; |
16373 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
16374 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
16375 | 0 | if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl || |
16376 | 0 | BuiltinID == PPC::BI__builtin_vsx_lxvll)) { |
16377 | 0 | Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); |
16378 | 0 | Ops.pop_back(); |
16379 | 0 | } |
16380 | |
|
16381 | 0 | switch (BuiltinID) { |
16382 | 0 | default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); |
16383 | 0 | case PPC::BI__builtin_altivec_lvx: |
16384 | 0 | ID = Intrinsic::ppc_altivec_lvx; |
16385 | 0 | break; |
16386 | 0 | case PPC::BI__builtin_altivec_lvxl: |
16387 | 0 | ID = Intrinsic::ppc_altivec_lvxl; |
16388 | 0 | break; |
16389 | 0 | case PPC::BI__builtin_altivec_lvebx: |
16390 | 0 | ID = Intrinsic::ppc_altivec_lvebx; |
16391 | 0 | break; |
16392 | 0 | case PPC::BI__builtin_altivec_lvehx: |
16393 | 0 | ID = Intrinsic::ppc_altivec_lvehx; |
16394 | 0 | break; |
16395 | 0 | case PPC::BI__builtin_altivec_lvewx: |
16396 | 0 | ID = Intrinsic::ppc_altivec_lvewx; |
16397 | 0 | break; |
16398 | 0 | case PPC::BI__builtin_altivec_lvsl: |
16399 | 0 | ID = Intrinsic::ppc_altivec_lvsl; |
16400 | 0 | break; |
16401 | 0 | case PPC::BI__builtin_altivec_lvsr: |
16402 | 0 | ID = Intrinsic::ppc_altivec_lvsr; |
16403 | 0 | break; |
16404 | 0 | case PPC::BI__builtin_vsx_lxvd2x: |
16405 | 0 | ID = Intrinsic::ppc_vsx_lxvd2x; |
16406 | 0 | break; |
16407 | 0 | case PPC::BI__builtin_vsx_lxvw4x: |
16408 | 0 | ID = Intrinsic::ppc_vsx_lxvw4x; |
16409 | 0 | break; |
16410 | 0 | case PPC::BI__builtin_vsx_lxvd2x_be: |
16411 | 0 | ID = Intrinsic::ppc_vsx_lxvd2x_be; |
16412 | 0 | break; |
16413 | 0 | case PPC::BI__builtin_vsx_lxvw4x_be: |
16414 | 0 | ID = Intrinsic::ppc_vsx_lxvw4x_be; |
16415 | 0 | break; |
16416 | 0 | case PPC::BI__builtin_vsx_lxvl: |
16417 | 0 | ID = Intrinsic::ppc_vsx_lxvl; |
16418 | 0 | break; |
16419 | 0 | case PPC::BI__builtin_vsx_lxvll: |
16420 | 0 | ID = Intrinsic::ppc_vsx_lxvll; |
16421 | 0 | break; |
16422 | 0 | } |
16423 | 0 | llvm::Function *F = CGM.getIntrinsic(ID); |
16424 | 0 | return Builder.CreateCall(F, Ops, ""); |
16425 | 0 | } |
16426 | | |
16427 | | // vec_st, vec_xst_be |
16428 | 0 | case PPC::BI__builtin_altivec_stvx: |
16429 | 0 | case PPC::BI__builtin_altivec_stvxl: |
16430 | 0 | case PPC::BI__builtin_altivec_stvebx: |
16431 | 0 | case PPC::BI__builtin_altivec_stvehx: |
16432 | 0 | case PPC::BI__builtin_altivec_stvewx: |
16433 | 0 | case PPC::BI__builtin_vsx_stxvd2x: |
16434 | 0 | case PPC::BI__builtin_vsx_stxvw4x: |
16435 | 0 | case PPC::BI__builtin_vsx_stxvd2x_be: |
16436 | 0 | case PPC::BI__builtin_vsx_stxvw4x_be: |
16437 | 0 | case PPC::BI__builtin_vsx_stxvl: |
16438 | 0 | case PPC::BI__builtin_vsx_stxvll: |
16439 | 0 | { |
16440 | 0 | SmallVector<Value *, 3> Ops; |
16441 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(0))); |
16442 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(1))); |
16443 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(2))); |
16444 | 0 | if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl || |
16445 | 0 | BuiltinID == PPC::BI__builtin_vsx_stxvll)) { |
16446 | 0 | Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); |
16447 | 0 | Ops.pop_back(); |
16448 | 0 | } |
16449 | |
|
16450 | 0 | switch (BuiltinID) { |
16451 | 0 | default: llvm_unreachable("Unsupported st intrinsic!"); |
16452 | 0 | case PPC::BI__builtin_altivec_stvx: |
16453 | 0 | ID = Intrinsic::ppc_altivec_stvx; |
16454 | 0 | break; |
16455 | 0 | case PPC::BI__builtin_altivec_stvxl: |
16456 | 0 | ID = Intrinsic::ppc_altivec_stvxl; |
16457 | 0 | break; |
16458 | 0 | case PPC::BI__builtin_altivec_stvebx: |
16459 | 0 | ID = Intrinsic::ppc_altivec_stvebx; |
16460 | 0 | break; |
16461 | 0 | case PPC::BI__builtin_altivec_stvehx: |
16462 | 0 | ID = Intrinsic::ppc_altivec_stvehx; |
16463 | 0 | break; |
16464 | 0 | case PPC::BI__builtin_altivec_stvewx: |
16465 | 0 | ID = Intrinsic::ppc_altivec_stvewx; |
16466 | 0 | break; |
16467 | 0 | case PPC::BI__builtin_vsx_stxvd2x: |
16468 | 0 | ID = Intrinsic::ppc_vsx_stxvd2x; |
16469 | 0 | break; |
16470 | 0 | case PPC::BI__builtin_vsx_stxvw4x: |
16471 | 0 | ID = Intrinsic::ppc_vsx_stxvw4x; |
16472 | 0 | break; |
16473 | 0 | case PPC::BI__builtin_vsx_stxvd2x_be: |
16474 | 0 | ID = Intrinsic::ppc_vsx_stxvd2x_be; |
16475 | 0 | break; |
16476 | 0 | case PPC::BI__builtin_vsx_stxvw4x_be: |
16477 | 0 | ID = Intrinsic::ppc_vsx_stxvw4x_be; |
16478 | 0 | break; |
16479 | 0 | case PPC::BI__builtin_vsx_stxvl: |
16480 | 0 | ID = Intrinsic::ppc_vsx_stxvl; |
16481 | 0 | break; |
16482 | 0 | case PPC::BI__builtin_vsx_stxvll: |
16483 | 0 | ID = Intrinsic::ppc_vsx_stxvll; |
16484 | 0 | break; |
16485 | 0 | } |
16486 | 0 | llvm::Function *F = CGM.getIntrinsic(ID); |
16487 | 0 | return Builder.CreateCall(F, Ops, ""); |
16488 | 0 | } |
16489 | 0 | case PPC::BI__builtin_vsx_ldrmb: { |
16490 | | // Essentially boils down to performing an unaligned VMX load sequence so |
16491 | | // as to avoid crossing a page boundary and then shuffling the elements |
16492 | | // into the right side of the vector register. |
16493 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16494 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16495 | 0 | int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); |
16496 | 0 | llvm::Type *ResTy = ConvertType(E->getType()); |
16497 | 0 | bool IsLE = getTarget().isLittleEndian(); |
16498 | | |
16499 | | // If the user wants the entire vector, just load the entire vector. |
16500 | 0 | if (NumBytes == 16) { |
16501 | 0 | Value *LD = |
16502 | 0 | Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1))); |
16503 | 0 | if (!IsLE) |
16504 | 0 | return LD; |
16505 | | |
16506 | | // Reverse the bytes on LE. |
16507 | 0 | SmallVector<int, 16> RevMask; |
16508 | 0 | for (int Idx = 0; Idx < 16; Idx++) |
16509 | 0 | RevMask.push_back(15 - Idx); |
16510 | 0 | return Builder.CreateShuffleVector(LD, LD, RevMask); |
16511 | 0 | } |
16512 | | |
16513 | 0 | llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx); |
16514 | 0 | llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr |
16515 | 0 | : Intrinsic::ppc_altivec_lvsl); |
16516 | 0 | llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); |
16517 | 0 | Value *HiMem = Builder.CreateGEP( |
16518 | 0 | Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1)); |
16519 | 0 | Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo"); |
16520 | 0 | Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); |
16521 | 0 | Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1"); |
16522 | |
|
16523 | 0 | Op0 = IsLE ? HiLd : LoLd; |
16524 | 0 | Op1 = IsLE ? LoLd : HiLd; |
16525 | 0 | Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1"); |
16526 | 0 | Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); |
16527 | |
|
16528 | 0 | if (IsLE) { |
16529 | 0 | SmallVector<int, 16> Consts; |
16530 | 0 | for (int Idx = 0; Idx < 16; Idx++) { |
16531 | 0 | int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1) |
16532 | 0 | : 16 - (NumBytes - Idx); |
16533 | 0 | Consts.push_back(Val); |
16534 | 0 | } |
16535 | 0 | return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy), |
16536 | 0 | Zero, Consts); |
16537 | 0 | } |
16538 | 0 | SmallVector<Constant *, 16> Consts; |
16539 | 0 | for (int Idx = 0; Idx < 16; Idx++) |
16540 | 0 | Consts.push_back(Builder.getInt8(NumBytes + Idx)); |
16541 | 0 | Value *Mask2 = ConstantVector::get(Consts); |
16542 | 0 | return Builder.CreateBitCast( |
16543 | 0 | Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); |
16544 | 0 | } |
16545 | 0 | case PPC::BI__builtin_vsx_strmb: { |
16546 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16547 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16548 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
16549 | 0 | int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); |
16550 | 0 | bool IsLE = getTarget().isLittleEndian(); |
16551 | 0 | auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { |
16552 | | // Storing the whole vector, simply store it on BE and reverse bytes and |
16553 | | // store on LE. |
16554 | 0 | if (Width == 16) { |
16555 | 0 | Value *StVec = Op2; |
16556 | 0 | if (IsLE) { |
16557 | 0 | SmallVector<int, 16> RevMask; |
16558 | 0 | for (int Idx = 0; Idx < 16; Idx++) |
16559 | 0 | RevMask.push_back(15 - Idx); |
16560 | 0 | StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); |
16561 | 0 | } |
16562 | 0 | return Builder.CreateStore( |
16563 | 0 | StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1))); |
16564 | 0 | } |
16565 | 0 | auto *ConvTy = Int64Ty; |
16566 | 0 | unsigned NumElts = 0; |
16567 | 0 | switch (Width) { |
16568 | 0 | default: |
16569 | 0 | llvm_unreachable("width for stores must be a power of 2"); |
16570 | 0 | case 8: |
16571 | 0 | ConvTy = Int64Ty; |
16572 | 0 | NumElts = 2; |
16573 | 0 | break; |
16574 | 0 | case 4: |
16575 | 0 | ConvTy = Int32Ty; |
16576 | 0 | NumElts = 4; |
16577 | 0 | break; |
16578 | 0 | case 2: |
16579 | 0 | ConvTy = Int16Ty; |
16580 | 0 | NumElts = 8; |
16581 | 0 | break; |
16582 | 0 | case 1: |
16583 | 0 | ConvTy = Int8Ty; |
16584 | 0 | NumElts = 16; |
16585 | 0 | break; |
16586 | 0 | } |
16587 | 0 | Value *Vec = Builder.CreateBitCast( |
16588 | 0 | Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); |
16589 | 0 | Value *Ptr = |
16590 | 0 | Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); |
16591 | 0 | Value *Elt = Builder.CreateExtractElement(Vec, EltNo); |
16592 | 0 | if (IsLE && Width > 1) { |
16593 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy); |
16594 | 0 | Elt = Builder.CreateCall(F, Elt); |
16595 | 0 | } |
16596 | 0 | return Builder.CreateStore( |
16597 | 0 | Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1))); |
16598 | 0 | }; |
16599 | 0 | unsigned Stored = 0; |
16600 | 0 | unsigned RemainingBytes = NumBytes; |
16601 | 0 | Value *Result; |
16602 | 0 | if (NumBytes == 16) |
16603 | 0 | return StoreSubVec(16, 0, 0); |
16604 | 0 | if (NumBytes >= 8) { |
16605 | 0 | Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1); |
16606 | 0 | RemainingBytes -= 8; |
16607 | 0 | Stored += 8; |
16608 | 0 | } |
16609 | 0 | if (RemainingBytes >= 4) { |
16610 | 0 | Result = StoreSubVec(4, NumBytes - Stored - 4, |
16611 | 0 | IsLE ? (Stored >> 2) : 3 - (Stored >> 2)); |
16612 | 0 | RemainingBytes -= 4; |
16613 | 0 | Stored += 4; |
16614 | 0 | } |
16615 | 0 | if (RemainingBytes >= 2) { |
16616 | 0 | Result = StoreSubVec(2, NumBytes - Stored - 2, |
16617 | 0 | IsLE ? (Stored >> 1) : 7 - (Stored >> 1)); |
16618 | 0 | RemainingBytes -= 2; |
16619 | 0 | Stored += 2; |
16620 | 0 | } |
16621 | 0 | if (RemainingBytes) |
16622 | 0 | Result = |
16623 | 0 | StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored); |
16624 | 0 | return Result; |
16625 | 0 | } |
16626 | | // Square root |
16627 | 0 | case PPC::BI__builtin_vsx_xvsqrtsp: |
16628 | 0 | case PPC::BI__builtin_vsx_xvsqrtdp: { |
16629 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16630 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16631 | 0 | if (Builder.getIsFPConstrained()) { |
16632 | 0 | llvm::Function *F = CGM.getIntrinsic( |
16633 | 0 | Intrinsic::experimental_constrained_sqrt, ResultType); |
16634 | 0 | return Builder.CreateConstrainedFPCall(F, X); |
16635 | 0 | } else { |
16636 | 0 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); |
16637 | 0 | return Builder.CreateCall(F, X); |
16638 | 0 | } |
16639 | 0 | } |
16640 | | // Count leading zeros |
16641 | 0 | case PPC::BI__builtin_altivec_vclzb: |
16642 | 0 | case PPC::BI__builtin_altivec_vclzh: |
16643 | 0 | case PPC::BI__builtin_altivec_vclzw: |
16644 | 0 | case PPC::BI__builtin_altivec_vclzd: { |
16645 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16646 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16647 | 0 | Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); |
16648 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); |
16649 | 0 | return Builder.CreateCall(F, {X, Undef}); |
16650 | 0 | } |
16651 | 0 | case PPC::BI__builtin_altivec_vctzb: |
16652 | 0 | case PPC::BI__builtin_altivec_vctzh: |
16653 | 0 | case PPC::BI__builtin_altivec_vctzw: |
16654 | 0 | case PPC::BI__builtin_altivec_vctzd: { |
16655 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16656 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16657 | 0 | Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); |
16658 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); |
16659 | 0 | return Builder.CreateCall(F, {X, Undef}); |
16660 | 0 | } |
16661 | 0 | case PPC::BI__builtin_altivec_vinsd: |
16662 | 0 | case PPC::BI__builtin_altivec_vinsw: |
16663 | 0 | case PPC::BI__builtin_altivec_vinsd_elt: |
16664 | 0 | case PPC::BI__builtin_altivec_vinsw_elt: { |
16665 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16666 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16667 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16668 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
16669 | |
|
16670 | 0 | bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw || |
16671 | 0 | BuiltinID == PPC::BI__builtin_altivec_vinsd); |
16672 | |
|
16673 | 0 | bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw || |
16674 | 0 | BuiltinID == PPC::BI__builtin_altivec_vinsw_elt); |
16675 | | |
16676 | | // The third argument must be a compile time constant. |
16677 | 0 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); |
16678 | 0 | assert(ArgCI && |
16679 | 0 | "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); |
16680 | | |
16681 | | // Valid value for the third argument is dependent on the input type and |
16682 | | // builtin called. |
16683 | 0 | int ValidMaxValue = 0; |
16684 | 0 | if (IsUnaligned) |
16685 | 0 | ValidMaxValue = (Is32bit) ? 12 : 8; |
16686 | 0 | else |
16687 | 0 | ValidMaxValue = (Is32bit) ? 3 : 1; |
16688 | | |
16689 | | // Get value of third argument. |
16690 | 0 | int64_t ConstArg = ArgCI->getSExtValue(); |
16691 | | |
16692 | | // Compose range checking error message. |
16693 | 0 | std::string RangeErrMsg = IsUnaligned ? "byte" : "element"; |
16694 | 0 | RangeErrMsg += " number " + llvm::to_string(ConstArg); |
16695 | 0 | RangeErrMsg += " is outside of the valid range [0, "; |
16696 | 0 | RangeErrMsg += llvm::to_string(ValidMaxValue) + "]"; |
16697 | | |
16698 | | // Issue error if third argument is not within the valid range. |
16699 | 0 | if (ConstArg < 0 || ConstArg > ValidMaxValue) |
16700 | 0 | CGM.Error(E->getExprLoc(), RangeErrMsg); |
16701 | | |
16702 | | // Input to vec_replace_elt is an element index, convert to byte index. |
16703 | 0 | if (!IsUnaligned) { |
16704 | 0 | ConstArg *= Is32bit ? 4 : 8; |
16705 | | // Fix the constant according to endianess. |
16706 | 0 | if (getTarget().isLittleEndian()) |
16707 | 0 | ConstArg = (Is32bit ? 12 : 8) - ConstArg; |
16708 | 0 | } |
16709 | |
|
16710 | 0 | ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd; |
16711 | 0 | Op2 = ConstantInt::getSigned(Int32Ty, ConstArg); |
16712 | | // Casting input to vector int as per intrinsic definition. |
16713 | 0 | Op0 = |
16714 | 0 | Is32bit |
16715 | 0 | ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)) |
16716 | 0 | : Builder.CreateBitCast(Op0, |
16717 | 0 | llvm::FixedVectorType::get(Int64Ty, 2)); |
16718 | 0 | return Builder.CreateBitCast( |
16719 | 0 | Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType); |
16720 | 0 | } |
16721 | 0 | case PPC::BI__builtin_altivec_vpopcntb: |
16722 | 0 | case PPC::BI__builtin_altivec_vpopcnth: |
16723 | 0 | case PPC::BI__builtin_altivec_vpopcntw: |
16724 | 0 | case PPC::BI__builtin_altivec_vpopcntd: { |
16725 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16726 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16727 | 0 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); |
16728 | 0 | return Builder.CreateCall(F, X); |
16729 | 0 | } |
16730 | 0 | case PPC::BI__builtin_altivec_vadduqm: |
16731 | 0 | case PPC::BI__builtin_altivec_vsubuqm: { |
16732 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16733 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16734 | 0 | llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); |
16735 | 0 | Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1)); |
16736 | 0 | Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1)); |
16737 | 0 | if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) |
16738 | 0 | return Builder.CreateAdd(Op0, Op1, "vadduqm"); |
16739 | 0 | else |
16740 | 0 | return Builder.CreateSub(Op0, Op1, "vsubuqm"); |
16741 | 0 | } |
16742 | 0 | case PPC::BI__builtin_altivec_vaddcuq_c: |
16743 | 0 | case PPC::BI__builtin_altivec_vsubcuq_c: { |
16744 | 0 | SmallVector<Value *, 2> Ops; |
16745 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16746 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16747 | 0 | llvm::Type *V1I128Ty = llvm::FixedVectorType::get( |
16748 | 0 | llvm::IntegerType::get(getLLVMContext(), 128), 1); |
16749 | 0 | Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); |
16750 | 0 | Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); |
16751 | 0 | ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c) |
16752 | 0 | ? Intrinsic::ppc_altivec_vaddcuq |
16753 | 0 | : Intrinsic::ppc_altivec_vsubcuq; |
16754 | 0 | return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); |
16755 | 0 | } |
16756 | 0 | case PPC::BI__builtin_altivec_vaddeuqm_c: |
16757 | 0 | case PPC::BI__builtin_altivec_vaddecuq_c: |
16758 | 0 | case PPC::BI__builtin_altivec_vsubeuqm_c: |
16759 | 0 | case PPC::BI__builtin_altivec_vsubecuq_c: { |
16760 | 0 | SmallVector<Value *, 3> Ops; |
16761 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16762 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16763 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
16764 | 0 | llvm::Type *V1I128Ty = llvm::FixedVectorType::get( |
16765 | 0 | llvm::IntegerType::get(getLLVMContext(), 128), 1); |
16766 | 0 | Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); |
16767 | 0 | Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); |
16768 | 0 | Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty)); |
16769 | 0 | switch (BuiltinID) { |
16770 | 0 | default: |
16771 | 0 | llvm_unreachable("Unsupported intrinsic!"); |
16772 | 0 | case PPC::BI__builtin_altivec_vaddeuqm_c: |
16773 | 0 | ID = Intrinsic::ppc_altivec_vaddeuqm; |
16774 | 0 | break; |
16775 | 0 | case PPC::BI__builtin_altivec_vaddecuq_c: |
16776 | 0 | ID = Intrinsic::ppc_altivec_vaddecuq; |
16777 | 0 | break; |
16778 | 0 | case PPC::BI__builtin_altivec_vsubeuqm_c: |
16779 | 0 | ID = Intrinsic::ppc_altivec_vsubeuqm; |
16780 | 0 | break; |
16781 | 0 | case PPC::BI__builtin_altivec_vsubecuq_c: |
16782 | 0 | ID = Intrinsic::ppc_altivec_vsubecuq; |
16783 | 0 | break; |
16784 | 0 | } |
16785 | 0 | return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); |
16786 | 0 | } |
16787 | | // Rotate and insert under mask operation. |
16788 | | // __rldimi(rs, is, shift, mask) |
16789 | | // (rotl64(rs, shift) & mask) | (is & ~mask) |
16790 | | // __rlwimi(rs, is, shift, mask) |
16791 | | // (rotl(rs, shift) & mask) | (is & ~mask) |
16792 | 0 | case PPC::BI__builtin_ppc_rldimi: |
16793 | 0 | case PPC::BI__builtin_ppc_rlwimi: { |
16794 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16795 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16796 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
16797 | 0 | Value *Op3 = EmitScalarExpr(E->getArg(3)); |
16798 | 0 | llvm::Type *Ty = Op0->getType(); |
16799 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); |
16800 | 0 | if (BuiltinID == PPC::BI__builtin_ppc_rldimi) |
16801 | 0 | Op2 = Builder.CreateZExt(Op2, Int64Ty); |
16802 | 0 | Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); |
16803 | 0 | Value *X = Builder.CreateAnd(Shift, Op3); |
16804 | 0 | Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3)); |
16805 | 0 | return Builder.CreateOr(X, Y); |
16806 | 0 | } |
16807 | | // Rotate and insert under mask operation. |
16808 | | // __rlwnm(rs, shift, mask) |
16809 | | // rotl(rs, shift) & mask |
16810 | 0 | case PPC::BI__builtin_ppc_rlwnm: { |
16811 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16812 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16813 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
16814 | 0 | llvm::Type *Ty = Op0->getType(); |
16815 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); |
16816 | 0 | Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1}); |
16817 | 0 | return Builder.CreateAnd(Shift, Op2); |
16818 | 0 | } |
16819 | 0 | case PPC::BI__builtin_ppc_poppar4: |
16820 | 0 | case PPC::BI__builtin_ppc_poppar8: { |
16821 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16822 | 0 | llvm::Type *ArgType = Op0->getType(); |
16823 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); |
16824 | 0 | Value *Tmp = Builder.CreateCall(F, Op0); |
16825 | |
|
16826 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16827 | 0 | Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); |
16828 | 0 | if (Result->getType() != ResultType) |
16829 | 0 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
16830 | 0 | "cast"); |
16831 | 0 | return Result; |
16832 | 0 | } |
16833 | 0 | case PPC::BI__builtin_ppc_cmpb: { |
16834 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16835 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16836 | 0 | if (getTarget().getTriple().isPPC64()) { |
16837 | 0 | Function *F = |
16838 | 0 | CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty}); |
16839 | 0 | return Builder.CreateCall(F, {Op0, Op1}, "cmpb"); |
16840 | 0 | } |
16841 | | // For 32 bit, emit the code as below: |
16842 | | // %conv = trunc i64 %a to i32 |
16843 | | // %conv1 = trunc i64 %b to i32 |
16844 | | // %shr = lshr i64 %a, 32 |
16845 | | // %conv2 = trunc i64 %shr to i32 |
16846 | | // %shr3 = lshr i64 %b, 32 |
16847 | | // %conv4 = trunc i64 %shr3 to i32 |
16848 | | // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1) |
16849 | | // %conv5 = zext i32 %0 to i64 |
16850 | | // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4) |
16851 | | // %conv614 = zext i32 %1 to i64 |
16852 | | // %shl = shl nuw i64 %conv614, 32 |
16853 | | // %or = or i64 %shl, %conv5 |
16854 | | // ret i64 %or |
16855 | 0 | Function *F = |
16856 | 0 | CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty}); |
16857 | 0 | Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty); |
16858 | 0 | Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty); |
16859 | 0 | Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32); |
16860 | 0 | Value *ArgOneHi = |
16861 | 0 | Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty); |
16862 | 0 | Value *ArgTwoHi = |
16863 | 0 | Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty); |
16864 | 0 | Value *ResLo = Builder.CreateZExt( |
16865 | 0 | Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty); |
16866 | 0 | Value *ResHiShift = Builder.CreateZExt( |
16867 | 0 | Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty); |
16868 | 0 | Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt); |
16869 | 0 | return Builder.CreateOr(ResLo, ResHi); |
16870 | 0 | } |
16871 | | // Copy sign |
16872 | 0 | case PPC::BI__builtin_vsx_xvcpsgnsp: |
16873 | 0 | case PPC::BI__builtin_vsx_xvcpsgndp: { |
16874 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16875 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16876 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
16877 | 0 | ID = Intrinsic::copysign; |
16878 | 0 | llvm::Function *F = CGM.getIntrinsic(ID, ResultType); |
16879 | 0 | return Builder.CreateCall(F, {X, Y}); |
16880 | 0 | } |
16881 | | // Rounding/truncation |
16882 | 0 | case PPC::BI__builtin_vsx_xvrspip: |
16883 | 0 | case PPC::BI__builtin_vsx_xvrdpip: |
16884 | 0 | case PPC::BI__builtin_vsx_xvrdpim: |
16885 | 0 | case PPC::BI__builtin_vsx_xvrspim: |
16886 | 0 | case PPC::BI__builtin_vsx_xvrdpi: |
16887 | 0 | case PPC::BI__builtin_vsx_xvrspi: |
16888 | 0 | case PPC::BI__builtin_vsx_xvrdpic: |
16889 | 0 | case PPC::BI__builtin_vsx_xvrspic: |
16890 | 0 | case PPC::BI__builtin_vsx_xvrdpiz: |
16891 | 0 | case PPC::BI__builtin_vsx_xvrspiz: { |
16892 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16893 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16894 | 0 | if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || |
16895 | 0 | BuiltinID == PPC::BI__builtin_vsx_xvrspim) |
16896 | 0 | ID = Builder.getIsFPConstrained() |
16897 | 0 | ? Intrinsic::experimental_constrained_floor |
16898 | 0 | : Intrinsic::floor; |
16899 | 0 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || |
16900 | 0 | BuiltinID == PPC::BI__builtin_vsx_xvrspi) |
16901 | 0 | ID = Builder.getIsFPConstrained() |
16902 | 0 | ? Intrinsic::experimental_constrained_round |
16903 | 0 | : Intrinsic::round; |
16904 | 0 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || |
16905 | 0 | BuiltinID == PPC::BI__builtin_vsx_xvrspic) |
16906 | 0 | ID = Builder.getIsFPConstrained() |
16907 | 0 | ? Intrinsic::experimental_constrained_rint |
16908 | 0 | : Intrinsic::rint; |
16909 | 0 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || |
16910 | 0 | BuiltinID == PPC::BI__builtin_vsx_xvrspip) |
16911 | 0 | ID = Builder.getIsFPConstrained() |
16912 | 0 | ? Intrinsic::experimental_constrained_ceil |
16913 | 0 | : Intrinsic::ceil; |
16914 | 0 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || |
16915 | 0 | BuiltinID == PPC::BI__builtin_vsx_xvrspiz) |
16916 | 0 | ID = Builder.getIsFPConstrained() |
16917 | 0 | ? Intrinsic::experimental_constrained_trunc |
16918 | 0 | : Intrinsic::trunc; |
16919 | 0 | llvm::Function *F = CGM.getIntrinsic(ID, ResultType); |
16920 | 0 | return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X) |
16921 | 0 | : Builder.CreateCall(F, X); |
16922 | 0 | } |
16923 | | |
16924 | | // Absolute value |
16925 | 0 | case PPC::BI__builtin_vsx_xvabsdp: |
16926 | 0 | case PPC::BI__builtin_vsx_xvabssp: { |
16927 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16928 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16929 | 0 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); |
16930 | 0 | return Builder.CreateCall(F, X); |
16931 | 0 | } |
16932 | | |
16933 | | // Fastmath by default |
16934 | 0 | case PPC::BI__builtin_ppc_recipdivf: |
16935 | 0 | case PPC::BI__builtin_ppc_recipdivd: |
16936 | 0 | case PPC::BI__builtin_ppc_rsqrtf: |
16937 | 0 | case PPC::BI__builtin_ppc_rsqrtd: { |
16938 | 0 | FastMathFlags FMF = Builder.getFastMathFlags(); |
16939 | 0 | Builder.getFastMathFlags().setFast(); |
16940 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
16941 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
16942 | |
|
16943 | 0 | if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || |
16944 | 0 | BuiltinID == PPC::BI__builtin_ppc_recipdivd) { |
16945 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
16946 | 0 | Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv"); |
16947 | 0 | Builder.getFastMathFlags() &= (FMF); |
16948 | 0 | return FDiv; |
16949 | 0 | } |
16950 | 0 | auto *One = ConstantFP::get(ResultType, 1.0); |
16951 | 0 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); |
16952 | 0 | Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt"); |
16953 | 0 | Builder.getFastMathFlags() &= (FMF); |
16954 | 0 | return FDiv; |
16955 | 0 | } |
16956 | 0 | case PPC::BI__builtin_ppc_alignx: { |
16957 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16958 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16959 | 0 | ConstantInt *AlignmentCI = cast<ConstantInt>(Op0); |
16960 | 0 | if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) |
16961 | 0 | AlignmentCI = ConstantInt::get(AlignmentCI->getType(), |
16962 | 0 | llvm::Value::MaximumAlignment); |
16963 | |
|
16964 | 0 | emitAlignmentAssumption(Op1, E->getArg(1), |
16965 | 0 | /*The expr loc is sufficient.*/ SourceLocation(), |
16966 | 0 | AlignmentCI, nullptr); |
16967 | 0 | return Op1; |
16968 | 0 | } |
16969 | 0 | case PPC::BI__builtin_ppc_rdlam: { |
16970 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16971 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
16972 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
16973 | 0 | llvm::Type *Ty = Op0->getType(); |
16974 | 0 | Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false); |
16975 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); |
16976 | 0 | Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt}); |
16977 | 0 | return Builder.CreateAnd(Rotate, Op2); |
16978 | 0 | } |
16979 | 0 | case PPC::BI__builtin_ppc_load2r: { |
16980 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); |
16981 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
16982 | 0 | Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); |
16983 | 0 | return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); |
16984 | 0 | } |
16985 | | // FMA variations |
16986 | 4 | case PPC::BI__builtin_ppc_fnmsub: |
16987 | 8 | case PPC::BI__builtin_ppc_fnmsubs: |
16988 | 9 | case PPC::BI__builtin_vsx_xvmaddadp: |
16989 | 10 | case PPC::BI__builtin_vsx_xvmaddasp: |
16990 | 11 | case PPC::BI__builtin_vsx_xvnmaddadp: |
16991 | 12 | case PPC::BI__builtin_vsx_xvnmaddasp: |
16992 | 13 | case PPC::BI__builtin_vsx_xvmsubadp: |
16993 | 14 | case PPC::BI__builtin_vsx_xvmsubasp: |
16994 | 15 | case PPC::BI__builtin_vsx_xvnmsubadp: |
16995 | 16 | case PPC::BI__builtin_vsx_xvnmsubasp: { |
16996 | 16 | llvm::Type *ResultType = ConvertType(E->getType()); |
16997 | 16 | Value *X = EmitScalarExpr(E->getArg(0)); |
16998 | 16 | Value *Y = EmitScalarExpr(E->getArg(1)); |
16999 | 16 | Value *Z = EmitScalarExpr(E->getArg(2)); |
17000 | 16 | llvm::Function *F; |
17001 | 16 | if (Builder.getIsFPConstrained()) |
17002 | 0 | F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); |
17003 | 16 | else |
17004 | 16 | F = CGM.getIntrinsic(Intrinsic::fma, ResultType); |
17005 | 16 | switch (BuiltinID) { |
17006 | 1 | case PPC::BI__builtin_vsx_xvmaddadp: |
17007 | 2 | case PPC::BI__builtin_vsx_xvmaddasp: |
17008 | 2 | if (Builder.getIsFPConstrained()) |
17009 | 0 | return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); |
17010 | 2 | else |
17011 | 2 | return Builder.CreateCall(F, {X, Y, Z}); |
17012 | 1 | case PPC::BI__builtin_vsx_xvnmaddadp: |
17013 | 2 | case PPC::BI__builtin_vsx_xvnmaddasp: |
17014 | 2 | if (Builder.getIsFPConstrained()) |
17015 | 0 | return Builder.CreateFNeg( |
17016 | 0 | Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); |
17017 | 2 | else |
17018 | 2 | return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); |
17019 | 1 | case PPC::BI__builtin_vsx_xvmsubadp: |
17020 | 2 | case PPC::BI__builtin_vsx_xvmsubasp: |
17021 | 2 | if (Builder.getIsFPConstrained()) |
17022 | 0 | return Builder.CreateConstrainedFPCall( |
17023 | 0 | F, {X, Y, Builder.CreateFNeg(Z, "neg")}); |
17024 | 2 | else |
17025 | 2 | return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); |
17026 | 4 | case PPC::BI__builtin_ppc_fnmsub: |
17027 | 8 | case PPC::BI__builtin_ppc_fnmsubs: |
17028 | 9 | case PPC::BI__builtin_vsx_xvnmsubadp: |
17029 | 10 | case PPC::BI__builtin_vsx_xvnmsubasp: |
17030 | 10 | if (Builder.getIsFPConstrained()) |
17031 | 0 | return Builder.CreateFNeg( |
17032 | 0 | Builder.CreateConstrainedFPCall( |
17033 | 0 | F, {X, Y, Builder.CreateFNeg(Z, "neg")}), |
17034 | 0 | "neg"); |
17035 | 10 | else |
17036 | 10 | return Builder.CreateCall( |
17037 | 10 | CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); |
17038 | 16 | } |
17039 | 0 | llvm_unreachable("Unknown FMA operation"); |
17040 | 0 | return nullptr; // Suppress no-return warning |
17041 | 16 | } |
17042 | | |
17043 | 0 | case PPC::BI__builtin_vsx_insertword: { |
17044 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17045 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17046 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17047 | 0 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); |
17048 | | |
17049 | | // Third argument is a compile time constant int. It must be clamped to |
17050 | | // to the range [0, 12]. |
17051 | 0 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); |
17052 | 0 | assert(ArgCI && |
17053 | 0 | "Third arg to xxinsertw intrinsic must be constant integer"); |
17054 | 0 | const int64_t MaxIndex = 12; |
17055 | 0 | int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); |
17056 | | |
17057 | | // The builtin semantics don't exactly match the xxinsertw instructions |
17058 | | // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the |
17059 | | // word from the first argument, and inserts it in the second argument. The |
17060 | | // instruction extracts the word from its second input register and inserts |
17061 | | // it into its first input register, so swap the first and second arguments. |
17062 | 0 | std::swap(Op0, Op1); |
17063 | | |
17064 | | // Need to cast the second argument from a vector of unsigned int to a |
17065 | | // vector of long long. |
17066 | 0 | Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); |
17067 | |
|
17068 | 0 | if (getTarget().isLittleEndian()) { |
17069 | | // Reverse the double words in the vector we will extract from. |
17070 | 0 | Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); |
17071 | 0 | Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0}); |
17072 | | |
17073 | | // Reverse the index. |
17074 | 0 | Index = MaxIndex - Index; |
17075 | 0 | } |
17076 | | |
17077 | | // Intrinsic expects the first arg to be a vector of int. |
17078 | 0 | Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); |
17079 | 0 | Op2 = ConstantInt::getSigned(Int32Ty, Index); |
17080 | 0 | return Builder.CreateCall(F, {Op0, Op1, Op2}); |
17081 | 0 | } |
17082 | | |
17083 | 0 | case PPC::BI__builtin_vsx_extractuword: { |
17084 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17085 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17086 | 0 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); |
17087 | | |
17088 | | // Intrinsic expects the first argument to be a vector of doublewords. |
17089 | 0 | Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); |
17090 | | |
17091 | | // The second argument is a compile time constant int that needs to |
17092 | | // be clamped to the range [0, 12]. |
17093 | 0 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1); |
17094 | 0 | assert(ArgCI && |
17095 | 0 | "Second Arg to xxextractuw intrinsic must be a constant integer!"); |
17096 | 0 | const int64_t MaxIndex = 12; |
17097 | 0 | int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); |
17098 | |
|
17099 | 0 | if (getTarget().isLittleEndian()) { |
17100 | | // Reverse the index. |
17101 | 0 | Index = MaxIndex - Index; |
17102 | 0 | Op1 = ConstantInt::getSigned(Int32Ty, Index); |
17103 | | |
17104 | | // Emit the call, then reverse the double words of the results vector. |
17105 | 0 | Value *Call = Builder.CreateCall(F, {Op0, Op1}); |
17106 | |
|
17107 | 0 | Value *ShuffleCall = |
17108 | 0 | Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0}); |
17109 | 0 | return ShuffleCall; |
17110 | 0 | } else { |
17111 | 0 | Op1 = ConstantInt::getSigned(Int32Ty, Index); |
17112 | 0 | return Builder.CreateCall(F, {Op0, Op1}); |
17113 | 0 | } |
17114 | 0 | } |
17115 | | |
17116 | 0 | case PPC::BI__builtin_vsx_xxpermdi: { |
17117 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17118 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17119 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17120 | 0 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); |
17121 | 0 | assert(ArgCI && "Third arg must be constant integer!"); |
17122 | | |
17123 | 0 | unsigned Index = ArgCI->getZExtValue(); |
17124 | 0 | Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); |
17125 | 0 | Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); |
17126 | | |
17127 | | // Account for endianness by treating this as just a shuffle. So we use the |
17128 | | // same indices for both LE and BE in order to produce expected results in |
17129 | | // both cases. |
17130 | 0 | int ElemIdx0 = (Index & 2) >> 1; |
17131 | 0 | int ElemIdx1 = 2 + (Index & 1); |
17132 | |
|
17133 | 0 | int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; |
17134 | 0 | Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); |
17135 | 0 | QualType BIRetType = E->getType(); |
17136 | 0 | auto RetTy = ConvertType(BIRetType); |
17137 | 0 | return Builder.CreateBitCast(ShuffleCall, RetTy); |
17138 | 0 | } |
17139 | | |
17140 | 0 | case PPC::BI__builtin_vsx_xxsldwi: { |
17141 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17142 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17143 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17144 | 0 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); |
17145 | 0 | assert(ArgCI && "Third argument must be a compile time constant"); |
17146 | 0 | unsigned Index = ArgCI->getZExtValue() & 0x3; |
17147 | 0 | Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); |
17148 | 0 | Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4)); |
17149 | | |
17150 | | // Create a shuffle mask |
17151 | 0 | int ElemIdx0; |
17152 | 0 | int ElemIdx1; |
17153 | 0 | int ElemIdx2; |
17154 | 0 | int ElemIdx3; |
17155 | 0 | if (getTarget().isLittleEndian()) { |
17156 | | // Little endian element N comes from element 8+N-Index of the |
17157 | | // concatenated wide vector (of course, using modulo arithmetic on |
17158 | | // the total number of elements). |
17159 | 0 | ElemIdx0 = (8 - Index) % 8; |
17160 | 0 | ElemIdx1 = (9 - Index) % 8; |
17161 | 0 | ElemIdx2 = (10 - Index) % 8; |
17162 | 0 | ElemIdx3 = (11 - Index) % 8; |
17163 | 0 | } else { |
17164 | | // Big endian ElemIdx<N> = Index + N |
17165 | 0 | ElemIdx0 = Index; |
17166 | 0 | ElemIdx1 = Index + 1; |
17167 | 0 | ElemIdx2 = Index + 2; |
17168 | 0 | ElemIdx3 = Index + 3; |
17169 | 0 | } |
17170 | |
|
17171 | 0 | int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; |
17172 | 0 | Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); |
17173 | 0 | QualType BIRetType = E->getType(); |
17174 | 0 | auto RetTy = ConvertType(BIRetType); |
17175 | 0 | return Builder.CreateBitCast(ShuffleCall, RetTy); |
17176 | 0 | } |
17177 | | |
17178 | 0 | case PPC::BI__builtin_pack_vector_int128: { |
17179 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17180 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17181 | 0 | bool isLittleEndian = getTarget().isLittleEndian(); |
17182 | 0 | Value *PoisonValue = |
17183 | 0 | llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); |
17184 | 0 | Value *Res = Builder.CreateInsertElement( |
17185 | 0 | PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); |
17186 | 0 | Res = Builder.CreateInsertElement(Res, Op1, |
17187 | 0 | (uint64_t)(isLittleEndian ? 0 : 1)); |
17188 | 0 | return Builder.CreateBitCast(Res, ConvertType(E->getType())); |
17189 | 0 | } |
17190 | | |
17191 | 0 | case PPC::BI__builtin_unpack_vector_int128: { |
17192 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17193 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17194 | 0 | ConstantInt *Index = cast<ConstantInt>(Op1); |
17195 | 0 | Value *Unpacked = Builder.CreateBitCast( |
17196 | 0 | Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); |
17197 | |
|
17198 | 0 | if (getTarget().isLittleEndian()) |
17199 | 0 | Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); |
17200 | |
|
17201 | 0 | return Builder.CreateExtractElement(Unpacked, Index); |
17202 | 0 | } |
17203 | | |
17204 | 0 | case PPC::BI__builtin_ppc_sthcx: { |
17205 | 0 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); |
17206 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17207 | 0 | Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); |
17208 | 0 | return Builder.CreateCall(F, {Op0, Op1}); |
17209 | 0 | } |
17210 | | |
17211 | | // The PPC MMA builtins take a pointer to a __vector_quad as an argument. |
17212 | | // Some of the MMA instructions accumulate their result into an existing |
17213 | | // accumulator whereas the others generate a new accumulator. So we need to |
17214 | | // use custom code generation to expand a builtin call with a pointer to a |
17215 | | // load (if the corresponding instruction accumulates its result) followed by |
17216 | | // the call to the intrinsic and a store of the result. |
17217 | 0 | #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \ |
17218 | 11.0k | case PPC::BI__builtin_##Name: |
17219 | 11.0k | #include "clang/Basic/BuiltinsPPC.def"0 |
17220 | 11.0k | { |
17221 | 11.0k | SmallVector<Value *, 4> Ops; |
17222 | 11.0k | for (unsigned i = 0, e = E->getNumArgs(); i != e1.06k ; i++822 ) |
17223 | 822 | if (E->getArg(i)->getType()->isArrayType()) |
17224 | 4 | Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer()); |
17225 | 818 | else |
17226 | 818 | Ops.push_back(EmitScalarExpr(E->getArg(i))); |
17227 | | // The first argument of these two builtins is a pointer used to store their |
17228 | | // result. However, the llvm intrinsics return their result in multiple |
17229 | | // return values. So, here we emit code extracting these values from the |
17230 | | // intrinsic results and storing them using that pointer. |
17231 | 11.0k | if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc240 || |
17232 | 240 | BuiltinID == PPC::BI__builtin_vsx_disassemble_pair234 || |
17233 | 240 | BuiltinID == PPC::BI__builtin_mma_disassemble_pair231 ) { |
17234 | 11 | unsigned NumVecs = 2; |
17235 | 11 | auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; |
17236 | 11 | if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { |
17237 | 6 | NumVecs = 4; |
17238 | 6 | Intrinsic = Intrinsic::ppc_mma_disassemble_acc; |
17239 | 6 | } |
17240 | 11 | llvm::Function *F = CGM.getIntrinsic(Intrinsic); |
17241 | 11 | Address Addr = EmitPointerWithAlignment(E->getArg(1)); |
17242 | 11 | Value *Vec = Builder.CreateLoad(Addr); |
17243 | 11 | Value *Call = Builder.CreateCall(F, {Vec}); |
17244 | 11 | llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16); |
17245 | 11 | Value *Ptr = Ops[0]; |
17246 | 45 | for (unsigned i=0; i<NumVecs; i++34 ) { |
17247 | 34 | Value *Vec = Builder.CreateExtractValue(Call, i); |
17248 | 34 | llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i); |
17249 | 34 | Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index); |
17250 | 34 | Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16)); |
17251 | 34 | } |
17252 | 11 | return Call; |
17253 | 11 | } |
17254 | 229 | if (BuiltinID == PPC::BI__builtin_vsx_build_pair || |
17255 | 229 | BuiltinID == PPC::BI__builtin_mma_build_acc225 ) { |
17256 | | // Reverse the order of the operands for LE, so the |
17257 | | // same builtin call can be used on both LE and BE |
17258 | | // without the need for the programmer to swap operands. |
17259 | | // The operands are reversed starting from the second argument, |
17260 | | // the first operand is the pointer to the pair/accumulator |
17261 | | // that is being built. |
17262 | 6 | if (getTarget().isLittleEndian()) |
17263 | 3 | std::reverse(Ops.begin() + 1, Ops.end()); |
17264 | 6 | } |
17265 | 229 | bool Accumulate; |
17266 | 229 | switch (BuiltinID) { |
17267 | 0 | #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \ |
17268 | 229 | case PPC::BI__builtin_##Name: \ |
17269 | 229 | ID = Intrinsic::ppc_##Intr; \ |
17270 | 229 | Accumulate = Acc; \ |
17271 | 229 | break; |
17272 | 229 | #include "clang/Basic/BuiltinsPPC.def"0 |
17273 | 229 | } |
17274 | 229 | if (BuiltinID == PPC::BI__builtin_vsx_lxvp || |
17275 | 229 | BuiltinID == PPC::BI__builtin_vsx_stxvp208 || |
17276 | 229 | BuiltinID == PPC::BI__builtin_mma_lxvp193 || |
17277 | 229 | BuiltinID == PPC::BI__builtin_mma_stxvp173 ) { |
17278 | 70 | if (BuiltinID == PPC::BI__builtin_vsx_lxvp || |
17279 | 70 | BuiltinID == PPC::BI__builtin_mma_lxvp49 ) { |
17280 | 41 | Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); |
17281 | 41 | } else { |
17282 | 29 | Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); |
17283 | 29 | } |
17284 | 70 | Ops.pop_back(); |
17285 | 70 | llvm::Function *F = CGM.getIntrinsic(ID); |
17286 | 70 | return Builder.CreateCall(F, Ops, ""); |
17287 | 70 | } |
17288 | 159 | SmallVector<Value*, 4> CallOps; |
17289 | 159 | if (Accumulate) { |
17290 | 102 | Address Addr = EmitPointerWithAlignment(E->getArg(0)); |
17291 | 102 | Value *Acc = Builder.CreateLoad(Addr); |
17292 | 102 | CallOps.push_back(Acc); |
17293 | 102 | } |
17294 | 631 | for (unsigned i=1; i<Ops.size(); i++472 ) |
17295 | 472 | CallOps.push_back(Ops[i]); |
17296 | 159 | llvm::Function *F = CGM.getIntrinsic(ID); |
17297 | 159 | Value *Call = Builder.CreateCall(F, CallOps); |
17298 | 159 | return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64)); |
17299 | 229 | } |
17300 | | |
17301 | 0 | case PPC::BI__builtin_ppc_compare_and_swap: |
17302 | 0 | case PPC::BI__builtin_ppc_compare_and_swaplp: { |
17303 | 0 | Address Addr = EmitPointerWithAlignment(E->getArg(0)); |
17304 | 0 | Address OldValAddr = EmitPointerWithAlignment(E->getArg(1)); |
17305 | 0 | Value *OldVal = Builder.CreateLoad(OldValAddr); |
17306 | 0 | QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); |
17307 | 0 | LValue LV = MakeAddrLValue(Addr, AtomicTy); |
17308 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17309 | 0 | auto Pair = EmitAtomicCompareExchange( |
17310 | 0 | LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(), |
17311 | 0 | llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); |
17312 | | // Unlike c11's atomic_compare_exchange, according to |
17313 | | // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp |
17314 | | // > In either case, the contents of the memory location specified by addr |
17315 | | // > are copied into the memory location specified by old_val_addr. |
17316 | | // But it hasn't specified storing to OldValAddr is atomic or not and |
17317 | | // which order to use. Now following XL's codegen, treat it as a normal |
17318 | | // store. |
17319 | 0 | Value *LoadedVal = Pair.first.getScalarVal(); |
17320 | 0 | Builder.CreateStore(LoadedVal, OldValAddr); |
17321 | 0 | return Builder.CreateZExt(Pair.second, Builder.getInt32Ty()); |
17322 | 0 | } |
17323 | 0 | case PPC::BI__builtin_ppc_fetch_and_add: |
17324 | 0 | case PPC::BI__builtin_ppc_fetch_and_addlp: { |
17325 | 0 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, |
17326 | 0 | llvm::AtomicOrdering::Monotonic); |
17327 | 0 | } |
17328 | 0 | case PPC::BI__builtin_ppc_fetch_and_and: |
17329 | 0 | case PPC::BI__builtin_ppc_fetch_and_andlp: { |
17330 | 0 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, |
17331 | 0 | llvm::AtomicOrdering::Monotonic); |
17332 | 0 | } |
17333 | | |
17334 | 0 | case PPC::BI__builtin_ppc_fetch_and_or: |
17335 | 0 | case PPC::BI__builtin_ppc_fetch_and_orlp: { |
17336 | 0 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, |
17337 | 0 | llvm::AtomicOrdering::Monotonic); |
17338 | 0 | } |
17339 | 0 | case PPC::BI__builtin_ppc_fetch_and_swap: |
17340 | 0 | case PPC::BI__builtin_ppc_fetch_and_swaplp: { |
17341 | 0 | return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, |
17342 | 0 | llvm::AtomicOrdering::Monotonic); |
17343 | 0 | } |
17344 | 0 | case PPC::BI__builtin_ppc_ldarx: |
17345 | 0 | case PPC::BI__builtin_ppc_lwarx: |
17346 | 0 | case PPC::BI__builtin_ppc_lharx: |
17347 | 0 | case PPC::BI__builtin_ppc_lbarx: |
17348 | 0 | return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E); |
17349 | 4 | case PPC::BI__builtin_ppc_mfspr: { |
17350 | 4 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17351 | 4 | llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 |
17352 | 4 | ? Int32Ty1 |
17353 | 4 | : Int64Ty3 ; |
17354 | 4 | Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType); |
17355 | 4 | return Builder.CreateCall(F, {Op0}); |
17356 | 0 | } |
17357 | 4 | case PPC::BI__builtin_ppc_mtspr: { |
17358 | 4 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17359 | 4 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17360 | 4 | llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 |
17361 | 4 | ? Int32Ty1 |
17362 | 4 | : Int64Ty3 ; |
17363 | 4 | Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType); |
17364 | 4 | return Builder.CreateCall(F, {Op0, Op1}); |
17365 | 0 | } |
17366 | 0 | case PPC::BI__builtin_ppc_popcntb: { |
17367 | 0 | Value *ArgValue = EmitScalarExpr(E->getArg(0)); |
17368 | 0 | llvm::Type *ArgType = ArgValue->getType(); |
17369 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType}); |
17370 | 0 | return Builder.CreateCall(F, {ArgValue}, "popcntb"); |
17371 | 0 | } |
17372 | 4 | case PPC::BI__builtin_ppc_mtfsf: { |
17373 | | // The builtin takes a uint32 that needs to be cast to an |
17374 | | // f64 to be passed to the intrinsic. |
17375 | 4 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17376 | 4 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17377 | 4 | Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy); |
17378 | 4 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf); |
17379 | 4 | return Builder.CreateCall(F, {Op0, Cast}, ""); |
17380 | 0 | } |
17381 | | |
17382 | 0 | case PPC::BI__builtin_ppc_swdiv_nochk: |
17383 | 0 | case PPC::BI__builtin_ppc_swdivs_nochk: { |
17384 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17385 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17386 | 0 | FastMathFlags FMF = Builder.getFastMathFlags(); |
17387 | 0 | Builder.getFastMathFlags().setFast(); |
17388 | 0 | Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk"); |
17389 | 0 | Builder.getFastMathFlags() &= (FMF); |
17390 | 0 | return FDiv; |
17391 | 0 | } |
17392 | 0 | case PPC::BI__builtin_ppc_fric: |
17393 | 0 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( |
17394 | 0 | *this, E, Intrinsic::rint, |
17395 | 0 | Intrinsic::experimental_constrained_rint)) |
17396 | 0 | .getScalarVal(); |
17397 | 0 | case PPC::BI__builtin_ppc_frim: |
17398 | 0 | case PPC::BI__builtin_ppc_frims: |
17399 | 0 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( |
17400 | 0 | *this, E, Intrinsic::floor, |
17401 | 0 | Intrinsic::experimental_constrained_floor)) |
17402 | 0 | .getScalarVal(); |
17403 | 0 | case PPC::BI__builtin_ppc_frin: |
17404 | 0 | case PPC::BI__builtin_ppc_frins: |
17405 | 0 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( |
17406 | 0 | *this, E, Intrinsic::round, |
17407 | 0 | Intrinsic::experimental_constrained_round)) |
17408 | 0 | .getScalarVal(); |
17409 | 0 | case PPC::BI__builtin_ppc_frip: |
17410 | 0 | case PPC::BI__builtin_ppc_frips: |
17411 | 0 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( |
17412 | 0 | *this, E, Intrinsic::ceil, |
17413 | 0 | Intrinsic::experimental_constrained_ceil)) |
17414 | 0 | .getScalarVal(); |
17415 | 0 | case PPC::BI__builtin_ppc_friz: |
17416 | 0 | case PPC::BI__builtin_ppc_frizs: |
17417 | 0 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( |
17418 | 0 | *this, E, Intrinsic::trunc, |
17419 | 0 | Intrinsic::experimental_constrained_trunc)) |
17420 | 0 | .getScalarVal(); |
17421 | 0 | case PPC::BI__builtin_ppc_fsqrt: |
17422 | 0 | case PPC::BI__builtin_ppc_fsqrts: |
17423 | 0 | return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( |
17424 | 0 | *this, E, Intrinsic::sqrt, |
17425 | 0 | Intrinsic::experimental_constrained_sqrt)) |
17426 | 0 | .getScalarVal(); |
17427 | 1 | case PPC::BI__builtin_ppc_test_data_class: { |
17428 | 1 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17429 | 1 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17430 | 1 | return Builder.CreateCall( |
17431 | 1 | CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()), |
17432 | 1 | {Op0, Op1}, "test_data_class"); |
17433 | 0 | } |
17434 | 0 | case PPC::BI__builtin_ppc_maxfe: { |
17435 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17436 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17437 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17438 | 0 | Value *Op3 = EmitScalarExpr(E->getArg(3)); |
17439 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), |
17440 | 0 | {Op0, Op1, Op2, Op3}); |
17441 | 0 | } |
17442 | 0 | case PPC::BI__builtin_ppc_maxfl: { |
17443 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17444 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17445 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17446 | 0 | Value *Op3 = EmitScalarExpr(E->getArg(3)); |
17447 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), |
17448 | 0 | {Op0, Op1, Op2, Op3}); |
17449 | 0 | } |
17450 | 0 | case PPC::BI__builtin_ppc_maxfs: { |
17451 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17452 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17453 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17454 | 0 | Value *Op3 = EmitScalarExpr(E->getArg(3)); |
17455 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), |
17456 | 0 | {Op0, Op1, Op2, Op3}); |
17457 | 0 | } |
17458 | 0 | case PPC::BI__builtin_ppc_minfe: { |
17459 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17460 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17461 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17462 | 0 | Value *Op3 = EmitScalarExpr(E->getArg(3)); |
17463 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), |
17464 | 0 | {Op0, Op1, Op2, Op3}); |
17465 | 0 | } |
17466 | 0 | case PPC::BI__builtin_ppc_minfl: { |
17467 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17468 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17469 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17470 | 0 | Value *Op3 = EmitScalarExpr(E->getArg(3)); |
17471 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), |
17472 | 0 | {Op0, Op1, Op2, Op3}); |
17473 | 0 | } |
17474 | 0 | case PPC::BI__builtin_ppc_minfs: { |
17475 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17476 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17477 | 0 | Value *Op2 = EmitScalarExpr(E->getArg(2)); |
17478 | 0 | Value *Op3 = EmitScalarExpr(E->getArg(3)); |
17479 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), |
17480 | 0 | {Op0, Op1, Op2, Op3}); |
17481 | 0 | } |
17482 | 0 | case PPC::BI__builtin_ppc_swdiv: |
17483 | 0 | case PPC::BI__builtin_ppc_swdivs: { |
17484 | 0 | Value *Op0 = EmitScalarExpr(E->getArg(0)); |
17485 | 0 | Value *Op1 = EmitScalarExpr(E->getArg(1)); |
17486 | 0 | return Builder.CreateFDiv(Op0, Op1, "swdiv"); |
17487 | 0 | } |
17488 | 0 | case PPC::BI__builtin_ppc_set_fpscr_rn: |
17489 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), |
17490 | 0 | {EmitScalarExpr(E->getArg(0))}); |
17491 | 0 | case PPC::BI__builtin_ppc_mffs: |
17492 | 0 | return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); |
17493 | 269 | } |
17494 | 269 | } |
17495 | | |
17496 | | namespace { |
17497 | | // If \p E is not null pointer, insert address space cast to match return |
17498 | | // type of \p E if necessary. |
17499 | | Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, |
17500 | 21 | const CallExpr *E = nullptr) { |
17501 | 21 | auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); |
17502 | 21 | auto *Call = CGF.Builder.CreateCall(F); |
17503 | 21 | Call->addRetAttr( |
17504 | 21 | Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); |
17505 | 21 | Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); |
17506 | 21 | if (!E) |
17507 | 16 | return Call; |
17508 | 5 | QualType BuiltinRetType = E->getType(); |
17509 | 5 | auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType)); |
17510 | 5 | if (RetTy == Call->getType()) |
17511 | 5 | return Call; |
17512 | 0 | return CGF.Builder.CreateAddrSpaceCast(Call, RetTy); |
17513 | 5 | } |
17514 | | |
17515 | 10 | Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { |
17516 | 10 | auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr); |
17517 | 10 | auto *Call = CGF.Builder.CreateCall(F); |
17518 | 10 | Call->addRetAttr( |
17519 | 10 | Attribute::getWithDereferenceableBytes(Call->getContext(), 256)); |
17520 | 10 | Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8))); |
17521 | 10 | return Call; |
17522 | 10 | } |
17523 | | |
17524 | | // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. |
17525 | | /// Emit code based on Code Object ABI version. |
17526 | | /// COV_4 : Emit code to use dispatch ptr |
17527 | | /// COV_5 : Emit code to use implicitarg ptr |
17528 | | /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version" |
17529 | | /// and use its value for COV_4 or COV_5 approach. It is used for |
17530 | | /// compiling device libraries in an ABI-agnostic way. |
17531 | | /// |
17532 | | /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by |
17533 | | /// clang during compilation of user code. |
17534 | 16 | Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { |
17535 | 16 | llvm::LoadInst *LD; |
17536 | | |
17537 | 16 | auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion; |
17538 | | |
17539 | 16 | if (Cov == clang::TargetOptions::COV_None) { |
17540 | 7 | StringRef Name = "__oclc_ABI_version"; |
17541 | 7 | auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name); |
17542 | 7 | if (!ABIVersionC) |
17543 | 3 | ABIVersionC = new llvm::GlobalVariable( |
17544 | 3 | CGF.CGM.getModule(), CGF.Int32Ty, false, |
17545 | 3 | llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr, |
17546 | 3 | llvm::GlobalVariable::NotThreadLocal, |
17547 | 3 | CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); |
17548 | | |
17549 | | // This load will be eliminated by the IPSCCP because it is constant |
17550 | | // weak_odr without externally_initialized. Either changing it to weak or |
17551 | | // adding externally_initialized will keep the load. |
17552 | 7 | Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC, |
17553 | 7 | CGF.CGM.getIntAlign()); |
17554 | | |
17555 | 7 | Value *IsCOV5 = CGF.Builder.CreateICmpSGE( |
17556 | 7 | ABIVersion, |
17557 | 7 | llvm::ConstantInt::get(CGF.Int32Ty, clang::TargetOptions::COV_5)); |
17558 | | |
17559 | | // Indexing the implicit kernarg segment. |
17560 | 7 | Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32( |
17561 | 7 | CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); |
17562 | | |
17563 | | // Indexing the HSA kernel_dispatch_packet struct. |
17564 | 7 | Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32( |
17565 | 7 | CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); |
17566 | | |
17567 | 7 | auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP); |
17568 | 7 | LD = CGF.Builder.CreateLoad( |
17569 | 7 | Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2))); |
17570 | 9 | } else { |
17571 | 9 | Value *GEP = nullptr; |
17572 | 9 | if (Cov == clang::TargetOptions::COV_5) { |
17573 | | // Indexing the implicit kernarg segment. |
17574 | 3 | GEP = CGF.Builder.CreateConstGEP1_32( |
17575 | 3 | CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); |
17576 | 6 | } else { |
17577 | | // Indexing the HSA kernel_dispatch_packet struct. |
17578 | 6 | GEP = CGF.Builder.CreateConstGEP1_32( |
17579 | 6 | CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); |
17580 | 6 | } |
17581 | 9 | LD = CGF.Builder.CreateLoad( |
17582 | 9 | Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2))); |
17583 | 9 | } |
17584 | | |
17585 | 16 | llvm::MDBuilder MDHelper(CGF.getLLVMContext()); |
17586 | 16 | llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), |
17587 | 16 | APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); |
17588 | 16 | LD->setMetadata(llvm::LLVMContext::MD_range, RNode); |
17589 | 16 | LD->setMetadata(llvm::LLVMContext::MD_noundef, |
17590 | 16 | llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); |
17591 | 16 | LD->setMetadata(llvm::LLVMContext::MD_invariant_load, |
17592 | 16 | llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); |
17593 | 16 | return LD; |
17594 | 16 | } |
17595 | | |
17596 | | // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. |
17597 | 3 | Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { |
17598 | 3 | const unsigned XOffset = 12; |
17599 | 3 | auto *DP = EmitAMDGPUDispatchPtr(CGF); |
17600 | | // Indexing the HSA kernel_dispatch_packet struct. |
17601 | 3 | auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4); |
17602 | 3 | auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset); |
17603 | 3 | auto *LD = CGF.Builder.CreateLoad( |
17604 | 3 | Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4))); |
17605 | 3 | LD->setMetadata(llvm::LLVMContext::MD_invariant_load, |
17606 | 3 | llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); |
17607 | 3 | return LD; |
17608 | 3 | } |
17609 | | } // namespace |
17610 | | |
17611 | | // For processing memory ordering and memory scope arguments of various |
17612 | | // amdgcn builtins. |
17613 | | // \p Order takes a C++11 comptabile memory-ordering specifier and converts |
17614 | | // it into LLVM's memory ordering specifier using atomic C ABI, and writes |
17615 | | // to \p AO. \p Scope takes a const char * and converts it into AMDGCN |
17616 | | // specific SyncScopeID and writes it to \p SSID. |
17617 | | void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, |
17618 | | llvm::AtomicOrdering &AO, |
17619 | 51 | llvm::SyncScope::ID &SSID) { |
17620 | 51 | int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); |
17621 | | |
17622 | | // Map C11/C++11 memory ordering to LLVM memory ordering |
17623 | 51 | assert(llvm::isValidAtomicOrderingCABI(ord)); |
17624 | 51 | switch (static_cast<llvm::AtomicOrderingCABI>(ord)) { |
17625 | 3 | case llvm::AtomicOrderingCABI::acquire: |
17626 | 5 | case llvm::AtomicOrderingCABI::consume: |
17627 | 5 | AO = llvm::AtomicOrdering::Acquire; |
17628 | 5 | break; |
17629 | 3 | case llvm::AtomicOrderingCABI::release: |
17630 | 3 | AO = llvm::AtomicOrdering::Release; |
17631 | 3 | break; |
17632 | 3 | case llvm::AtomicOrderingCABI::acq_rel: |
17633 | 3 | AO = llvm::AtomicOrdering::AcquireRelease; |
17634 | 3 | break; |
17635 | 38 | case llvm::AtomicOrderingCABI::seq_cst: |
17636 | 38 | AO = llvm::AtomicOrdering::SequentiallyConsistent; |
17637 | 38 | break; |
17638 | 2 | case llvm::AtomicOrderingCABI::relaxed: |
17639 | 2 | AO = llvm::AtomicOrdering::Monotonic; |
17640 | 2 | break; |
17641 | 51 | } |
17642 | | |
17643 | 51 | StringRef scp; |
17644 | 51 | llvm::getConstantStringInfo(Scope, scp); |
17645 | 51 | SSID = getLLVMContext().getOrInsertSyncScopeID(scp); |
17646 | 51 | } |
17647 | | |
17648 | | llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, |
17649 | | unsigned Idx, |
17650 | 63.9k | const CallExpr *E) { |
17651 | 63.9k | llvm::Value *Arg = nullptr; |
17652 | 63.9k | if ((ICEArguments & (1 << Idx)) == 0) { |
17653 | 53.2k | Arg = EmitScalarExpr(E->getArg(Idx)); |
17654 | 53.2k | } else { |
17655 | | // If this is required to be a constant, constant fold it so that we |
17656 | | // know that the generated intrinsic gets a ConstantInt. |
17657 | 10.6k | std::optional<llvm::APSInt> Result = |
17658 | 10.6k | E->getArg(Idx)->getIntegerConstantExpr(getContext()); |
17659 | 10.6k | assert(Result && "Expected argument to be a constant"); |
17660 | 10.6k | Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); |
17661 | 10.6k | } |
17662 | 63.9k | return Arg; |
17663 | 63.9k | } |
17664 | | |
17665 | | Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, |
17666 | 439 | const CallExpr *E) { |
17667 | 439 | llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; |
17668 | 439 | llvm::SyncScope::ID SSID; |
17669 | 439 | switch (BuiltinID) { |
17670 | 1 | case AMDGPU::BI__builtin_amdgcn_div_scale: |
17671 | 4 | case AMDGPU::BI__builtin_amdgcn_div_scalef: { |
17672 | | // Translate from the intrinsics's struct return to the builtin's out |
17673 | | // argument. |
17674 | | |
17675 | 4 | Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); |
17676 | | |
17677 | 4 | llvm::Value *X = EmitScalarExpr(E->getArg(0)); |
17678 | 4 | llvm::Value *Y = EmitScalarExpr(E->getArg(1)); |
17679 | 4 | llvm::Value *Z = EmitScalarExpr(E->getArg(2)); |
17680 | | |
17681 | 4 | llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, |
17682 | 4 | X->getType()); |
17683 | | |
17684 | 4 | llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); |
17685 | | |
17686 | 4 | llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); |
17687 | 4 | llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); |
17688 | | |
17689 | 4 | llvm::Type *RealFlagType = FlagOutPtr.getElementType(); |
17690 | | |
17691 | 4 | llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); |
17692 | 4 | Builder.CreateStore(FlagExt, FlagOutPtr); |
17693 | 4 | return Result; |
17694 | 1 | } |
17695 | 1 | case AMDGPU::BI__builtin_amdgcn_div_fmas: |
17696 | 2 | case AMDGPU::BI__builtin_amdgcn_div_fmasf: { |
17697 | 2 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17698 | 2 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
17699 | 2 | llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); |
17700 | 2 | llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); |
17701 | | |
17702 | 2 | llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, |
17703 | 2 | Src0->getType()); |
17704 | 2 | llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); |
17705 | 2 | return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); |
17706 | 1 | } |
17707 | | |
17708 | 0 | case AMDGPU::BI__builtin_amdgcn_ds_swizzle: |
17709 | 0 | return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); |
17710 | 8 | case AMDGPU::BI__builtin_amdgcn_mov_dpp8: |
17711 | 8 | return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); |
17712 | 5 | case AMDGPU::BI__builtin_amdgcn_mov_dpp: |
17713 | 13 | case AMDGPU::BI__builtin_amdgcn_update_dpp: { |
17714 | 13 | llvm::SmallVector<llvm::Value *, 6> Args; |
17715 | | // Find out if any arguments are required to be integer constant |
17716 | | // expressions. |
17717 | 13 | unsigned ICEArguments = 0; |
17718 | 13 | ASTContext::GetBuiltinTypeError Error; |
17719 | 13 | getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); |
17720 | 13 | assert(Error == ASTContext::GE_None && "Should not codegen an error"); |
17721 | 86 | for (unsigned I = 0; 13 I != E->getNumArgs(); ++I73 ) { |
17722 | 73 | Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E)); |
17723 | 73 | } |
17724 | 13 | assert(Args.size() == 5 || Args.size() == 6); |
17725 | 13 | if (Args.size() == 5) |
17726 | 5 | Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); |
17727 | 13 | Function *F = |
17728 | 13 | CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); |
17729 | 13 | return Builder.CreateCall(F, Args); |
17730 | 13 | } |
17731 | 1 | case AMDGPU::BI__builtin_amdgcn_div_fixup: |
17732 | 2 | case AMDGPU::BI__builtin_amdgcn_div_fixupf: |
17733 | 7 | case AMDGPU::BI__builtin_amdgcn_div_fixuph: |
17734 | 7 | return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); |
17735 | 1 | case AMDGPU::BI__builtin_amdgcn_trig_preop: |
17736 | 2 | case AMDGPU::BI__builtin_amdgcn_trig_preopf: |
17737 | 2 | return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); |
17738 | 1 | case AMDGPU::BI__builtin_amdgcn_rcp: |
17739 | 5 | case AMDGPU::BI__builtin_amdgcn_rcpf: |
17740 | 10 | case AMDGPU::BI__builtin_amdgcn_rcph: |
17741 | 10 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); |
17742 | 1 | case AMDGPU::BI__builtin_amdgcn_sqrt: |
17743 | 2 | case AMDGPU::BI__builtin_amdgcn_sqrtf: |
17744 | 7 | case AMDGPU::BI__builtin_amdgcn_sqrth: |
17745 | 7 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt); |
17746 | 1 | case AMDGPU::BI__builtin_amdgcn_rsq: |
17747 | 5 | case AMDGPU::BI__builtin_amdgcn_rsqf: |
17748 | 10 | case AMDGPU::BI__builtin_amdgcn_rsqh: |
17749 | 10 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); |
17750 | 1 | case AMDGPU::BI__builtin_amdgcn_rsq_clamp: |
17751 | 2 | case AMDGPU::BI__builtin_amdgcn_rsq_clampf: |
17752 | 2 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); |
17753 | 1 | case AMDGPU::BI__builtin_amdgcn_sinf: |
17754 | 6 | case AMDGPU::BI__builtin_amdgcn_sinh: |
17755 | 6 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); |
17756 | 1 | case AMDGPU::BI__builtin_amdgcn_cosf: |
17757 | 6 | case AMDGPU::BI__builtin_amdgcn_cosh: |
17758 | 6 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); |
17759 | 5 | case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: |
17760 | 5 | return EmitAMDGPUDispatchPtr(*this, E); |
17761 | 4 | case AMDGPU::BI__builtin_amdgcn_logf: |
17762 | 4 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log); |
17763 | 7 | case AMDGPU::BI__builtin_amdgcn_exp2f: |
17764 | 7 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2); |
17765 | 1 | case AMDGPU::BI__builtin_amdgcn_log_clampf: |
17766 | 1 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); |
17767 | 11 | case AMDGPU::BI__builtin_amdgcn_ldexp: |
17768 | 22 | case AMDGPU::BI__builtin_amdgcn_ldexpf: { |
17769 | 22 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17770 | 22 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
17771 | 22 | llvm::Function *F = |
17772 | 22 | CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()}); |
17773 | 22 | return Builder.CreateCall(F, {Src0, Src1}); |
17774 | 11 | } |
17775 | 5 | case AMDGPU::BI__builtin_amdgcn_ldexph: { |
17776 | | // The raw instruction has a different behavior for out of bounds exponent |
17777 | | // values (implicit truncation instead of saturate to short_min/short_max). |
17778 | 5 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17779 | 5 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
17780 | 5 | llvm::Function *F = |
17781 | 5 | CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty}); |
17782 | 5 | return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)}); |
17783 | 11 | } |
17784 | 1 | case AMDGPU::BI__builtin_amdgcn_frexp_mant: |
17785 | 2 | case AMDGPU::BI__builtin_amdgcn_frexp_mantf: |
17786 | 7 | case AMDGPU::BI__builtin_amdgcn_frexp_manth: |
17787 | 7 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); |
17788 | 1 | case AMDGPU::BI__builtin_amdgcn_frexp_exp: |
17789 | 2 | case AMDGPU::BI__builtin_amdgcn_frexp_expf: { |
17790 | 2 | Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17791 | 2 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, |
17792 | 2 | { Builder.getInt32Ty(), Src0->getType() }); |
17793 | 2 | return Builder.CreateCall(F, Src0); |
17794 | 1 | } |
17795 | 5 | case AMDGPU::BI__builtin_amdgcn_frexp_exph: { |
17796 | 5 | Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17797 | 5 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, |
17798 | 5 | { Builder.getInt16Ty(), Src0->getType() }); |
17799 | 5 | return Builder.CreateCall(F, Src0); |
17800 | 1 | } |
17801 | 1 | case AMDGPU::BI__builtin_amdgcn_fract: |
17802 | 2 | case AMDGPU::BI__builtin_amdgcn_fractf: |
17803 | 7 | case AMDGPU::BI__builtin_amdgcn_fracth: |
17804 | 7 | return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); |
17805 | 0 | case AMDGPU::BI__builtin_amdgcn_lerp: |
17806 | 0 | return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); |
17807 | 1 | case AMDGPU::BI__builtin_amdgcn_ubfe: |
17808 | 1 | return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); |
17809 | 1 | case AMDGPU::BI__builtin_amdgcn_sbfe: |
17810 | 1 | return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); |
17811 | 16 | case AMDGPU::BI__builtin_amdgcn_ballot_w32: |
17812 | 31 | case AMDGPU::BI__builtin_amdgcn_ballot_w64: { |
17813 | 31 | llvm::Type *ResultType = ConvertType(E->getType()); |
17814 | 31 | llvm::Value *Src = EmitScalarExpr(E->getArg(0)); |
17815 | 31 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); |
17816 | 31 | return Builder.CreateCall(F, { Src }); |
17817 | 16 | } |
17818 | 1 | case AMDGPU::BI__builtin_amdgcn_uicmp: |
17819 | 4 | case AMDGPU::BI__builtin_amdgcn_uicmpl: |
17820 | 5 | case AMDGPU::BI__builtin_amdgcn_sicmp: |
17821 | 6 | case AMDGPU::BI__builtin_amdgcn_sicmpl: { |
17822 | 6 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17823 | 6 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
17824 | 6 | llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); |
17825 | | |
17826 | | // FIXME-GFX10: How should 32 bit mask be handled? |
17827 | 6 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, |
17828 | 6 | { Builder.getInt64Ty(), Src0->getType() }); |
17829 | 6 | return Builder.CreateCall(F, { Src0, Src1, Src2 }); |
17830 | 5 | } |
17831 | 1 | case AMDGPU::BI__builtin_amdgcn_fcmp: |
17832 | 2 | case AMDGPU::BI__builtin_amdgcn_fcmpf: { |
17833 | 2 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17834 | 2 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
17835 | 2 | llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); |
17836 | | |
17837 | | // FIXME-GFX10: How should 32 bit mask be handled? |
17838 | 2 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, |
17839 | 2 | { Builder.getInt64Ty(), Src0->getType() }); |
17840 | 2 | return Builder.CreateCall(F, { Src0, Src1, Src2 }); |
17841 | 1 | } |
17842 | 1 | case AMDGPU::BI__builtin_amdgcn_class: |
17843 | 2 | case AMDGPU::BI__builtin_amdgcn_classf: |
17844 | 7 | case AMDGPU::BI__builtin_amdgcn_classh: |
17845 | 7 | return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); |
17846 | 1 | case AMDGPU::BI__builtin_amdgcn_fmed3f: |
17847 | 5 | case AMDGPU::BI__builtin_amdgcn_fmed3h: |
17848 | 5 | return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); |
17849 | 1 | case AMDGPU::BI__builtin_amdgcn_ds_append: |
17850 | 2 | case AMDGPU::BI__builtin_amdgcn_ds_consume: { |
17851 | 2 | Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? |
17852 | 1 | Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; |
17853 | 2 | Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17854 | 2 | Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); |
17855 | 2 | return Builder.CreateCall(F, { Src0, Builder.getFalse() }); |
17856 | 1 | } |
17857 | 7 | case AMDGPU::BI__builtin_amdgcn_ds_faddf: |
17858 | 15 | case AMDGPU::BI__builtin_amdgcn_ds_fminf: |
17859 | 23 | case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { |
17860 | 23 | Intrinsic::ID Intrin; |
17861 | 23 | switch (BuiltinID) { |
17862 | 7 | case AMDGPU::BI__builtin_amdgcn_ds_faddf: |
17863 | 7 | Intrin = Intrinsic::amdgcn_ds_fadd; |
17864 | 7 | break; |
17865 | 8 | case AMDGPU::BI__builtin_amdgcn_ds_fminf: |
17866 | 8 | Intrin = Intrinsic::amdgcn_ds_fmin; |
17867 | 8 | break; |
17868 | 8 | case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: |
17869 | 8 | Intrin = Intrinsic::amdgcn_ds_fmax; |
17870 | 8 | break; |
17871 | 23 | } |
17872 | 23 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
17873 | 23 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
17874 | 23 | llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); |
17875 | 23 | llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); |
17876 | 23 | llvm::Value *Src4 = EmitScalarExpr(E->getArg(4)); |
17877 | 23 | llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() }); |
17878 | 23 | llvm::FunctionType *FTy = F->getFunctionType(); |
17879 | 23 | llvm::Type *PTy = FTy->getParamType(0); |
17880 | 23 | Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy); |
17881 | 23 | return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 }); |
17882 | 23 | } |
17883 | 3 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: |
17884 | 14 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: |
17885 | 18 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: |
17886 | 21 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: |
17887 | 24 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: |
17888 | 29 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: |
17889 | 34 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: |
17890 | 39 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: |
17891 | 42 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: |
17892 | 46 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { |
17893 | 46 | Intrinsic::ID IID; |
17894 | 46 | llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); |
17895 | 46 | switch (BuiltinID) { |
17896 | 11 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: |
17897 | 11 | ArgTy = llvm::Type::getFloatTy(getLLVMContext()); |
17898 | 11 | IID = Intrinsic::amdgcn_global_atomic_fadd; |
17899 | 11 | break; |
17900 | 4 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: |
17901 | 4 | ArgTy = llvm::FixedVectorType::get( |
17902 | 4 | llvm::Type::getHalfTy(getLLVMContext()), 2); |
17903 | 4 | IID = Intrinsic::amdgcn_global_atomic_fadd; |
17904 | 4 | break; |
17905 | 3 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: |
17906 | 3 | IID = Intrinsic::amdgcn_global_atomic_fadd; |
17907 | 3 | break; |
17908 | 3 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: |
17909 | 3 | IID = Intrinsic::amdgcn_global_atomic_fmin; |
17910 | 3 | break; |
17911 | 3 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: |
17912 | 3 | IID = Intrinsic::amdgcn_global_atomic_fmax; |
17913 | 3 | break; |
17914 | 5 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: |
17915 | 5 | IID = Intrinsic::amdgcn_flat_atomic_fadd; |
17916 | 5 | break; |
17917 | 5 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: |
17918 | 5 | IID = Intrinsic::amdgcn_flat_atomic_fmin; |
17919 | 5 | break; |
17920 | 5 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: |
17921 | 5 | IID = Intrinsic::amdgcn_flat_atomic_fmax; |
17922 | 5 | break; |
17923 | 3 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: |
17924 | 3 | ArgTy = llvm::Type::getFloatTy(getLLVMContext()); |
17925 | 3 | IID = Intrinsic::amdgcn_flat_atomic_fadd; |
17926 | 3 | break; |
17927 | 4 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: |
17928 | 4 | ArgTy = llvm::FixedVectorType::get( |
17929 | 4 | llvm::Type::getHalfTy(getLLVMContext()), 2); |
17930 | 4 | IID = Intrinsic::amdgcn_flat_atomic_fadd; |
17931 | 4 | break; |
17932 | 46 | } |
17933 | 46 | llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); |
17934 | 46 | llvm::Value *Val = EmitScalarExpr(E->getArg(1)); |
17935 | 46 | llvm::Function *F = |
17936 | 46 | CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); |
17937 | 46 | return Builder.CreateCall(F, {Addr, Val}); |
17938 | 46 | } |
17939 | 4 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: |
17940 | 8 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { |
17941 | 8 | Intrinsic::ID IID; |
17942 | 8 | switch (BuiltinID) { |
17943 | 4 | case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: |
17944 | 4 | IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; |
17945 | 4 | break; |
17946 | 4 | case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: |
17947 | 4 | IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; |
17948 | 4 | break; |
17949 | 8 | } |
17950 | 8 | llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); |
17951 | 8 | llvm::Value *Val = EmitScalarExpr(E->getArg(1)); |
17952 | 8 | llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); |
17953 | 8 | return Builder.CreateCall(F, {Addr, Val}); |
17954 | 8 | } |
17955 | 2 | case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: |
17956 | 9 | case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: |
17957 | 16 | case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: { |
17958 | 16 | Intrinsic::ID IID; |
17959 | 16 | llvm::Type *ArgTy; |
17960 | 16 | switch (BuiltinID) { |
17961 | 7 | case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: |
17962 | 7 | ArgTy = llvm::Type::getFloatTy(getLLVMContext()); |
17963 | 7 | IID = Intrinsic::amdgcn_ds_fadd; |
17964 | 7 | break; |
17965 | 2 | case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: |
17966 | 2 | ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); |
17967 | 2 | IID = Intrinsic::amdgcn_ds_fadd; |
17968 | 2 | break; |
17969 | 7 | case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: |
17970 | 7 | ArgTy = llvm::FixedVectorType::get( |
17971 | 7 | llvm::Type::getHalfTy(getLLVMContext()), 2); |
17972 | 7 | IID = Intrinsic::amdgcn_ds_fadd; |
17973 | 7 | break; |
17974 | 16 | } |
17975 | 16 | llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); |
17976 | 16 | llvm::Value *Val = EmitScalarExpr(E->getArg(1)); |
17977 | 16 | llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue( |
17978 | 16 | llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true)); |
17979 | 16 | llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue( |
17980 | 16 | llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0)); |
17981 | 16 | llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy}); |
17982 | 16 | return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); |
17983 | 16 | } |
17984 | 10 | case AMDGPU::BI__builtin_amdgcn_read_exec: |
17985 | 10 | return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); |
17986 | 10 | case AMDGPU::BI__builtin_amdgcn_read_exec_lo: |
17987 | 10 | return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); |
17988 | 10 | case AMDGPU::BI__builtin_amdgcn_read_exec_hi: |
17989 | 10 | return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); |
17990 | 2 | case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: |
17991 | 4 | case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: |
17992 | 6 | case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: |
17993 | 8 | case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: { |
17994 | 8 | llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0)); |
17995 | 8 | llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1)); |
17996 | 8 | llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2)); |
17997 | 8 | llvm::Value *RayDir = EmitScalarExpr(E->getArg(3)); |
17998 | 8 | llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4)); |
17999 | 8 | llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5)); |
18000 | | |
18001 | | // The builtins take these arguments as vec4 where the last element is |
18002 | | // ignored. The intrinsic takes them as vec3. |
18003 | 8 | RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin, |
18004 | 8 | ArrayRef<int>{0, 1, 2}); |
18005 | 8 | RayDir = |
18006 | 8 | Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2}); |
18007 | 8 | RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir, |
18008 | 8 | ArrayRef<int>{0, 1, 2}); |
18009 | | |
18010 | 8 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray, |
18011 | 8 | {NodePtr->getType(), RayDir->getType()}); |
18012 | 8 | return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir, |
18013 | 8 | RayInverseDir, TextureDescr}); |
18014 | 6 | } |
18015 | | |
18016 | 6 | case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: { |
18017 | 6 | SmallVector<Value *, 4> Args; |
18018 | 30 | for (int i = 0, e = E->getNumArgs(); i != e; ++i24 ) |
18019 | 24 | Args.push_back(EmitScalarExpr(E->getArg(i))); |
18020 | | |
18021 | 6 | Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn); |
18022 | 6 | Value *Call = Builder.CreateCall(F, Args); |
18023 | 6 | Value *Rtn = Builder.CreateExtractValue(Call, 0); |
18024 | 6 | Value *A = Builder.CreateExtractValue(Call, 1); |
18025 | 6 | llvm::Type *RetTy = ConvertType(E->getType()); |
18026 | 6 | Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn, |
18027 | 6 | (uint64_t)0); |
18028 | 6 | return Builder.CreateInsertElement(I0, A, 1); |
18029 | 6 | } |
18030 | | |
18031 | 1 | case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: |
18032 | 2 | case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: |
18033 | 3 | case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: |
18034 | 4 | case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: |
18035 | 5 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: |
18036 | 6 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: |
18037 | 7 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: |
18038 | 8 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: |
18039 | 9 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: |
18040 | 10 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: |
18041 | 11 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: |
18042 | 12 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: { |
18043 | | |
18044 | | // These operations perform a matrix multiplication and accumulation of |
18045 | | // the form: |
18046 | | // D = A * B + C |
18047 | | // The return type always matches the type of matrix C. |
18048 | 12 | unsigned ArgForMatchingRetType; |
18049 | 12 | unsigned BuiltinWMMAOp; |
18050 | | |
18051 | 12 | switch (BuiltinID) { |
18052 | 1 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: |
18053 | 2 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: |
18054 | 2 | ArgForMatchingRetType = 2; |
18055 | 2 | BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16; |
18056 | 2 | break; |
18057 | 1 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: |
18058 | 2 | case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: |
18059 | 2 | ArgForMatchingRetType = 2; |
18060 | 2 | BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16; |
18061 | 2 | break; |
18062 | 1 | case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: |
18063 | 2 | case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: |
18064 | 2 | ArgForMatchingRetType = 2; |
18065 | 2 | BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16; |
18066 | 2 | break; |
18067 | 1 | case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: |
18068 | 2 | case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: |
18069 | 2 | ArgForMatchingRetType = 2; |
18070 | 2 | BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; |
18071 | 2 | break; |
18072 | 1 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: |
18073 | 2 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: |
18074 | 2 | ArgForMatchingRetType = 4; |
18075 | 2 | BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8; |
18076 | 2 | break; |
18077 | 1 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: |
18078 | 2 | case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: |
18079 | 2 | ArgForMatchingRetType = 4; |
18080 | 2 | BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4; |
18081 | 2 | break; |
18082 | 12 | } |
18083 | | |
18084 | 12 | SmallVector<Value *, 6> Args; |
18085 | 64 | for (int i = 0, e = E->getNumArgs(); i != e; ++i52 ) |
18086 | 52 | Args.push_back(EmitScalarExpr(E->getArg(i))); |
18087 | | |
18088 | 12 | Function *F = CGM.getIntrinsic(BuiltinWMMAOp, |
18089 | 12 | {Args[ArgForMatchingRetType]->getType()}); |
18090 | | |
18091 | 12 | return Builder.CreateCall(F, Args); |
18092 | 12 | } |
18093 | | |
18094 | | // amdgcn workitem |
18095 | 1 | case AMDGPU::BI__builtin_amdgcn_workitem_id_x: |
18096 | 1 | return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); |
18097 | 1 | case AMDGPU::BI__builtin_amdgcn_workitem_id_y: |
18098 | 1 | return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); |
18099 | 1 | case AMDGPU::BI__builtin_amdgcn_workitem_id_z: |
18100 | 1 | return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); |
18101 | | |
18102 | | // amdgcn workgroup size |
18103 | 6 | case AMDGPU::BI__builtin_amdgcn_workgroup_size_x: |
18104 | 6 | return EmitAMDGPUWorkGroupSize(*this, 0); |
18105 | 5 | case AMDGPU::BI__builtin_amdgcn_workgroup_size_y: |
18106 | 5 | return EmitAMDGPUWorkGroupSize(*this, 1); |
18107 | 5 | case AMDGPU::BI__builtin_amdgcn_workgroup_size_z: |
18108 | 5 | return EmitAMDGPUWorkGroupSize(*this, 2); |
18109 | | |
18110 | | // amdgcn grid size |
18111 | 1 | case AMDGPU::BI__builtin_amdgcn_grid_size_x: |
18112 | 1 | return EmitAMDGPUGridSize(*this, 0); |
18113 | 1 | case AMDGPU::BI__builtin_amdgcn_grid_size_y: |
18114 | 1 | return EmitAMDGPUGridSize(*this, 1); |
18115 | 1 | case AMDGPU::BI__builtin_amdgcn_grid_size_z: |
18116 | 1 | return EmitAMDGPUGridSize(*this, 2); |
18117 | | |
18118 | | // r600 intrinsics |
18119 | 0 | case AMDGPU::BI__builtin_r600_recipsqrt_ieee: |
18120 | 1 | case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: |
18121 | 1 | return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); |
18122 | 1 | case AMDGPU::BI__builtin_r600_read_tidig_x: |
18123 | 1 | return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); |
18124 | 1 | case AMDGPU::BI__builtin_r600_read_tidig_y: |
18125 | 1 | return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); |
18126 | 1 | case AMDGPU::BI__builtin_r600_read_tidig_z: |
18127 | 1 | return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); |
18128 | 1 | case AMDGPU::BI__builtin_amdgcn_alignbit: { |
18129 | 1 | llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); |
18130 | 1 | llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); |
18131 | 1 | llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); |
18132 | 1 | Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); |
18133 | 1 | return Builder.CreateCall(F, { Src0, Src1, Src2 }); |
18134 | 0 | } |
18135 | 6 | case AMDGPU::BI__builtin_amdgcn_fence: { |
18136 | 6 | ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), |
18137 | 6 | EmitScalarExpr(E->getArg(1)), AO, SSID); |
18138 | 6 | return Builder.CreateFence(AO, SSID); |
18139 | 0 | } |
18140 | 11 | case AMDGPU::BI__builtin_amdgcn_atomic_inc32: |
18141 | 19 | case AMDGPU::BI__builtin_amdgcn_atomic_inc64: |
18142 | 33 | case AMDGPU::BI__builtin_amdgcn_atomic_dec32: |
18143 | 45 | case AMDGPU::BI__builtin_amdgcn_atomic_dec64: { |
18144 | 45 | llvm::AtomicRMWInst::BinOp BinOp; |
18145 | 45 | switch (BuiltinID) { |
18146 | 11 | case AMDGPU::BI__builtin_amdgcn_atomic_inc32: |
18147 | 19 | case AMDGPU::BI__builtin_amdgcn_atomic_inc64: |
18148 | 19 | BinOp = llvm::AtomicRMWInst::UIncWrap; |
18149 | 19 | break; |
18150 | 14 | case AMDGPU::BI__builtin_amdgcn_atomic_dec32: |
18151 | 26 | case AMDGPU::BI__builtin_amdgcn_atomic_dec64: |
18152 | 26 | BinOp = llvm::AtomicRMWInst::UDecWrap; |
18153 | 26 | break; |
18154 | 45 | } |
18155 | | |
18156 | 45 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
18157 | 45 | Value *Val = EmitScalarExpr(E->getArg(1)); |
18158 | | |
18159 | 45 | ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), |
18160 | 45 | EmitScalarExpr(E->getArg(3)), AO, SSID); |
18161 | | |
18162 | 45 | QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); |
18163 | 45 | bool Volatile = |
18164 | 45 | PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); |
18165 | | |
18166 | 45 | llvm::AtomicRMWInst *RMW = |
18167 | 45 | Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); |
18168 | 45 | if (Volatile) |
18169 | 5 | RMW->setVolatile(true); |
18170 | 45 | return RMW; |
18171 | 45 | } |
18172 | 7 | case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: |
18173 | 15 | case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: { |
18174 | 15 | llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); |
18175 | 15 | llvm::Type *ResultType = ConvertType(E->getType()); |
18176 | | // s_sendmsg_rtn is mangled using return type only. |
18177 | 15 | Function *F = |
18178 | 15 | CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); |
18179 | 15 | return Builder.CreateCall(F, {Arg}); |
18180 | 7 | } |
18181 | 0 | default: |
18182 | 0 | return nullptr; |
18183 | 439 | } |
18184 | 439 | } |
18185 | | |
18186 | | /// Handle a SystemZ function in which the final argument is a pointer |
18187 | | /// to an int that receives the post-instruction CC value. At the LLVM level |
18188 | | /// this is represented as a function that returns a {result, cc} pair. |
18189 | | static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, |
18190 | | unsigned IntrinsicID, |
18191 | 0 | const CallExpr *E) { |
18192 | 0 | unsigned NumArgs = E->getNumArgs() - 1; |
18193 | 0 | SmallVector<Value *, 8> Args(NumArgs); |
18194 | 0 | for (unsigned I = 0; I < NumArgs; ++I) |
18195 | 0 | Args[I] = CGF.EmitScalarExpr(E->getArg(I)); |
18196 | 0 | Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); |
18197 | 0 | Function *F = CGF.CGM.getIntrinsic(IntrinsicID); |
18198 | 0 | Value *Call = CGF.Builder.CreateCall(F, Args); |
18199 | 0 | Value *CC = CGF.Builder.CreateExtractValue(Call, 1); |
18200 | 0 | CGF.Builder.CreateStore(CC, CCPtr); |
18201 | 0 | return CGF.Builder.CreateExtractValue(Call, 0); |
18202 | 0 | } |
18203 | | |
18204 | | Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, |
18205 | 0 | const CallExpr *E) { |
18206 | 0 | switch (BuiltinID) { |
18207 | 0 | case SystemZ::BI__builtin_tbegin: { |
18208 | 0 | Value *TDB = EmitScalarExpr(E->getArg(0)); |
18209 | 0 | Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); |
18210 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); |
18211 | 0 | return Builder.CreateCall(F, {TDB, Control}); |
18212 | 0 | } |
18213 | 0 | case SystemZ::BI__builtin_tbegin_nofloat: { |
18214 | 0 | Value *TDB = EmitScalarExpr(E->getArg(0)); |
18215 | 0 | Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); |
18216 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); |
18217 | 0 | return Builder.CreateCall(F, {TDB, Control}); |
18218 | 0 | } |
18219 | 0 | case SystemZ::BI__builtin_tbeginc: { |
18220 | 0 | Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); |
18221 | 0 | Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); |
18222 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); |
18223 | 0 | return Builder.CreateCall(F, {TDB, Control}); |
18224 | 0 | } |
18225 | 0 | case SystemZ::BI__builtin_tabort: { |
18226 | 0 | Value *Data = EmitScalarExpr(E->getArg(0)); |
18227 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); |
18228 | 0 | return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); |
18229 | 0 | } |
18230 | 0 | case SystemZ::BI__builtin_non_tx_store: { |
18231 | 0 | Value *Address = EmitScalarExpr(E->getArg(0)); |
18232 | 0 | Value *Data = EmitScalarExpr(E->getArg(1)); |
18233 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); |
18234 | 0 | return Builder.CreateCall(F, {Data, Address}); |
18235 | 0 | } |
18236 | | |
18237 | | // Vector builtins. Note that most vector builtins are mapped automatically |
18238 | | // to target-specific LLVM intrinsics. The ones handled specially here can |
18239 | | // be represented via standard LLVM IR, which is preferable to enable common |
18240 | | // LLVM optimizations. |
18241 | | |
18242 | 0 | case SystemZ::BI__builtin_s390_vpopctb: |
18243 | 0 | case SystemZ::BI__builtin_s390_vpopcth: |
18244 | 0 | case SystemZ::BI__builtin_s390_vpopctf: |
18245 | 0 | case SystemZ::BI__builtin_s390_vpopctg: { |
18246 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18247 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18248 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); |
18249 | 0 | return Builder.CreateCall(F, X); |
18250 | 0 | } |
18251 | | |
18252 | 0 | case SystemZ::BI__builtin_s390_vclzb: |
18253 | 0 | case SystemZ::BI__builtin_s390_vclzh: |
18254 | 0 | case SystemZ::BI__builtin_s390_vclzf: |
18255 | 0 | case SystemZ::BI__builtin_s390_vclzg: { |
18256 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18257 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18258 | 0 | Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); |
18259 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); |
18260 | 0 | return Builder.CreateCall(F, {X, Undef}); |
18261 | 0 | } |
18262 | | |
18263 | 0 | case SystemZ::BI__builtin_s390_vctzb: |
18264 | 0 | case SystemZ::BI__builtin_s390_vctzh: |
18265 | 0 | case SystemZ::BI__builtin_s390_vctzf: |
18266 | 0 | case SystemZ::BI__builtin_s390_vctzg: { |
18267 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18268 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18269 | 0 | Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); |
18270 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); |
18271 | 0 | return Builder.CreateCall(F, {X, Undef}); |
18272 | 0 | } |
18273 | | |
18274 | 0 | case SystemZ::BI__builtin_s390_vfsqsb: |
18275 | 0 | case SystemZ::BI__builtin_s390_vfsqdb: { |
18276 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18277 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18278 | 0 | if (Builder.getIsFPConstrained()) { |
18279 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType); |
18280 | 0 | return Builder.CreateConstrainedFPCall(F, { X }); |
18281 | 0 | } else { |
18282 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); |
18283 | 0 | return Builder.CreateCall(F, X); |
18284 | 0 | } |
18285 | 0 | } |
18286 | 0 | case SystemZ::BI__builtin_s390_vfmasb: |
18287 | 0 | case SystemZ::BI__builtin_s390_vfmadb: { |
18288 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18289 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18290 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
18291 | 0 | Value *Z = EmitScalarExpr(E->getArg(2)); |
18292 | 0 | if (Builder.getIsFPConstrained()) { |
18293 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); |
18294 | 0 | return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); |
18295 | 0 | } else { |
18296 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); |
18297 | 0 | return Builder.CreateCall(F, {X, Y, Z}); |
18298 | 0 | } |
18299 | 0 | } |
18300 | 0 | case SystemZ::BI__builtin_s390_vfmssb: |
18301 | 0 | case SystemZ::BI__builtin_s390_vfmsdb: { |
18302 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18303 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18304 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
18305 | 0 | Value *Z = EmitScalarExpr(E->getArg(2)); |
18306 | 0 | if (Builder.getIsFPConstrained()) { |
18307 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); |
18308 | 0 | return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); |
18309 | 0 | } else { |
18310 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); |
18311 | 0 | return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); |
18312 | 0 | } |
18313 | 0 | } |
18314 | 0 | case SystemZ::BI__builtin_s390_vfnmasb: |
18315 | 0 | case SystemZ::BI__builtin_s390_vfnmadb: { |
18316 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18317 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18318 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
18319 | 0 | Value *Z = EmitScalarExpr(E->getArg(2)); |
18320 | 0 | if (Builder.getIsFPConstrained()) { |
18321 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); |
18322 | 0 | return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); |
18323 | 0 | } else { |
18324 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); |
18325 | 0 | return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); |
18326 | 0 | } |
18327 | 0 | } |
18328 | 0 | case SystemZ::BI__builtin_s390_vfnmssb: |
18329 | 0 | case SystemZ::BI__builtin_s390_vfnmsdb: { |
18330 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18331 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18332 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
18333 | 0 | Value *Z = EmitScalarExpr(E->getArg(2)); |
18334 | 0 | if (Builder.getIsFPConstrained()) { |
18335 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); |
18336 | 0 | Value *NegZ = Builder.CreateFNeg(Z, "sub"); |
18337 | 0 | return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ})); |
18338 | 0 | } else { |
18339 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); |
18340 | 0 | Value *NegZ = Builder.CreateFNeg(Z, "neg"); |
18341 | 0 | return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ})); |
18342 | 0 | } |
18343 | 0 | } |
18344 | 0 | case SystemZ::BI__builtin_s390_vflpsb: |
18345 | 0 | case SystemZ::BI__builtin_s390_vflpdb: { |
18346 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18347 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18348 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); |
18349 | 0 | return Builder.CreateCall(F, X); |
18350 | 0 | } |
18351 | 0 | case SystemZ::BI__builtin_s390_vflnsb: |
18352 | 0 | case SystemZ::BI__builtin_s390_vflndb: { |
18353 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18354 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18355 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); |
18356 | 0 | return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg"); |
18357 | 0 | } |
18358 | 0 | case SystemZ::BI__builtin_s390_vfisb: |
18359 | 0 | case SystemZ::BI__builtin_s390_vfidb: { |
18360 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18361 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18362 | | // Constant-fold the M4 and M5 mask arguments. |
18363 | 0 | llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext()); |
18364 | 0 | llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext()); |
18365 | | // Check whether this instance can be represented via a LLVM standard |
18366 | | // intrinsic. We only support some combinations of M4 and M5. |
18367 | 0 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
18368 | 0 | Intrinsic::ID CI; |
18369 | 0 | switch (M4.getZExtValue()) { |
18370 | 0 | default: break; |
18371 | 0 | case 0: // IEEE-inexact exception allowed |
18372 | 0 | switch (M5.getZExtValue()) { |
18373 | 0 | default: break; |
18374 | 0 | case 0: ID = Intrinsic::rint; |
18375 | 0 | CI = Intrinsic::experimental_constrained_rint; break; |
18376 | 0 | } |
18377 | 0 | break; |
18378 | 0 | case 4: // IEEE-inexact exception suppressed |
18379 | 0 | switch (M5.getZExtValue()) { |
18380 | 0 | default: break; |
18381 | 0 | case 0: ID = Intrinsic::nearbyint; |
18382 | 0 | CI = Intrinsic::experimental_constrained_nearbyint; break; |
18383 | 0 | case 1: ID = Intrinsic::round; |
18384 | 0 | CI = Intrinsic::experimental_constrained_round; break; |
18385 | 0 | case 5: ID = Intrinsic::trunc; |
18386 | 0 | CI = Intrinsic::experimental_constrained_trunc; break; |
18387 | 0 | case 6: ID = Intrinsic::ceil; |
18388 | 0 | CI = Intrinsic::experimental_constrained_ceil; break; |
18389 | 0 | case 7: ID = Intrinsic::floor; |
18390 | 0 | CI = Intrinsic::experimental_constrained_floor; break; |
18391 | 0 | } |
18392 | 0 | break; |
18393 | 0 | } |
18394 | 0 | if (ID != Intrinsic::not_intrinsic) { |
18395 | 0 | if (Builder.getIsFPConstrained()) { |
18396 | 0 | Function *F = CGM.getIntrinsic(CI, ResultType); |
18397 | 0 | return Builder.CreateConstrainedFPCall(F, X); |
18398 | 0 | } else { |
18399 | 0 | Function *F = CGM.getIntrinsic(ID, ResultType); |
18400 | 0 | return Builder.CreateCall(F, X); |
18401 | 0 | } |
18402 | 0 | } |
18403 | 0 | switch (BuiltinID) { // FIXME: constrained version? |
18404 | 0 | case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; |
18405 | 0 | case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; |
18406 | 0 | default: llvm_unreachable("Unknown BuiltinID"); |
18407 | 0 | } |
18408 | 0 | Function *F = CGM.getIntrinsic(ID); |
18409 | 0 | Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); |
18410 | 0 | Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); |
18411 | 0 | return Builder.CreateCall(F, {X, M4Value, M5Value}); |
18412 | 0 | } |
18413 | 0 | case SystemZ::BI__builtin_s390_vfmaxsb: |
18414 | 0 | case SystemZ::BI__builtin_s390_vfmaxdb: { |
18415 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18416 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18417 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
18418 | | // Constant-fold the M4 mask argument. |
18419 | 0 | llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); |
18420 | | // Check whether this instance can be represented via a LLVM standard |
18421 | | // intrinsic. We only support some values of M4. |
18422 | 0 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
18423 | 0 | Intrinsic::ID CI; |
18424 | 0 | switch (M4.getZExtValue()) { |
18425 | 0 | default: break; |
18426 | 0 | case 4: ID = Intrinsic::maxnum; |
18427 | 0 | CI = Intrinsic::experimental_constrained_maxnum; break; |
18428 | 0 | } |
18429 | 0 | if (ID != Intrinsic::not_intrinsic) { |
18430 | 0 | if (Builder.getIsFPConstrained()) { |
18431 | 0 | Function *F = CGM.getIntrinsic(CI, ResultType); |
18432 | 0 | return Builder.CreateConstrainedFPCall(F, {X, Y}); |
18433 | 0 | } else { |
18434 | 0 | Function *F = CGM.getIntrinsic(ID, ResultType); |
18435 | 0 | return Builder.CreateCall(F, {X, Y}); |
18436 | 0 | } |
18437 | 0 | } |
18438 | 0 | switch (BuiltinID) { |
18439 | 0 | case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; |
18440 | 0 | case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; |
18441 | 0 | default: llvm_unreachable("Unknown BuiltinID"); |
18442 | 0 | } |
18443 | 0 | Function *F = CGM.getIntrinsic(ID); |
18444 | 0 | Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); |
18445 | 0 | return Builder.CreateCall(F, {X, Y, M4Value}); |
18446 | 0 | } |
18447 | 0 | case SystemZ::BI__builtin_s390_vfminsb: |
18448 | 0 | case SystemZ::BI__builtin_s390_vfmindb: { |
18449 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18450 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18451 | 0 | Value *Y = EmitScalarExpr(E->getArg(1)); |
18452 | | // Constant-fold the M4 mask argument. |
18453 | 0 | llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); |
18454 | | // Check whether this instance can be represented via a LLVM standard |
18455 | | // intrinsic. We only support some values of M4. |
18456 | 0 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
18457 | 0 | Intrinsic::ID CI; |
18458 | 0 | switch (M4.getZExtValue()) { |
18459 | 0 | default: break; |
18460 | 0 | case 4: ID = Intrinsic::minnum; |
18461 | 0 | CI = Intrinsic::experimental_constrained_minnum; break; |
18462 | 0 | } |
18463 | 0 | if (ID != Intrinsic::not_intrinsic) { |
18464 | 0 | if (Builder.getIsFPConstrained()) { |
18465 | 0 | Function *F = CGM.getIntrinsic(CI, ResultType); |
18466 | 0 | return Builder.CreateConstrainedFPCall(F, {X, Y}); |
18467 | 0 | } else { |
18468 | 0 | Function *F = CGM.getIntrinsic(ID, ResultType); |
18469 | 0 | return Builder.CreateCall(F, {X, Y}); |
18470 | 0 | } |
18471 | 0 | } |
18472 | 0 | switch (BuiltinID) { |
18473 | 0 | case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; |
18474 | 0 | case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; |
18475 | 0 | default: llvm_unreachable("Unknown BuiltinID"); |
18476 | 0 | } |
18477 | 0 | Function *F = CGM.getIntrinsic(ID); |
18478 | 0 | Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); |
18479 | 0 | return Builder.CreateCall(F, {X, Y, M4Value}); |
18480 | 0 | } |
18481 | | |
18482 | 0 | case SystemZ::BI__builtin_s390_vlbrh: |
18483 | 0 | case SystemZ::BI__builtin_s390_vlbrf: |
18484 | 0 | case SystemZ::BI__builtin_s390_vlbrg: { |
18485 | 0 | llvm::Type *ResultType = ConvertType(E->getType()); |
18486 | 0 | Value *X = EmitScalarExpr(E->getArg(0)); |
18487 | 0 | Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType); |
18488 | 0 | return Builder.CreateCall(F, X); |
18489 | 0 | } |
18490 | | |
18491 | | // Vector intrinsics that output the post-instruction CC value. |
18492 | | |
18493 | 0 | #define INTRINSIC_WITH_CC(NAME) \ |
18494 | 0 | case SystemZ::BI__builtin_##NAME: \ |
18495 | 0 | return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) |
18496 | | |
18497 | 0 | INTRINSIC_WITH_CC(s390_vpkshs); |
18498 | 0 | INTRINSIC_WITH_CC(s390_vpksfs); |
18499 | 0 | INTRINSIC_WITH_CC(s390_vpksgs); |
18500 | | |
18501 | 0 | INTRINSIC_WITH_CC(s390_vpklshs); |
18502 | 0 | INTRINSIC_WITH_CC(s390_vpklsfs); |
18503 | 0 | INTRINSIC_WITH_CC(s390_vpklsgs); |
18504 | | |
18505 | 0 | INTRINSIC_WITH_CC(s390_vceqbs); |
18506 | 0 | INTRINSIC_WITH_CC(s390_vceqhs); |
18507 | 0 | INTRINSIC_WITH_CC(s390_vceqfs); |
18508 | 0 | INTRINSIC_WITH_CC(s390_vceqgs); |
18509 | | |
18510 | 0 | INTRINSIC_WITH_CC(s390_vchbs); |
18511 | 0 | INTRINSIC_WITH_CC(s390_vchhs); |
18512 | 0 | INTRINSIC_WITH_CC(s390_vchfs); |
18513 | 0 | INTRINSIC_WITH_CC(s390_vchgs); |
18514 | | |
18515 | 0 | INTRINSIC_WITH_CC(s390_vchlbs); |
18516 | 0 | INTRINSIC_WITH_CC(s390_vchlhs); |
18517 | 0 | INTRINSIC_WITH_CC(s390_vchlfs); |
18518 | 0 | INTRINSIC_WITH_CC(s390_vchlgs); |
18519 | | |
18520 | 0 | INTRINSIC_WITH_CC(s390_vfaebs); |
18521 | 0 | INTRINSIC_WITH_CC(s390_vfaehs); |
18522 | 0 | INTRINSIC_WITH_CC(s390_vfaefs); |
18523 | | |
18524 | 0 | INTRINSIC_WITH_CC(s390_vfaezbs); |
18525 | 0 | INTRINSIC_WITH_CC(s390_vfaezhs); |
18526 | 0 | INTRINSIC_WITH_CC(s390_vfaezfs); |
18527 | | |
18528 | 0 | INTRINSIC_WITH_CC(s390_vfeebs); |
18529 | 0 | INTRINSIC_WITH_CC(s390_vfeehs); |
18530 | 0 | INTRINSIC_WITH_CC(s390_vfeefs); |
18531 | | |
18532 | 0 | INTRINSIC_WITH_CC(s390_vfeezbs); |
18533 | 0 | INTRINSIC_WITH_CC(s390_vfeezhs); |
18534 | 0 | INTRINSIC_WITH_CC(s390_vfeezfs); |
18535 | | |
18536 | 0 | INTRINSIC_WITH_CC(s390_vfenebs); |
18537 | 0 | INTRINSIC_WITH_CC(s390_vfenehs); |
18538 | 0 | INTRINSIC_WITH_CC(s390_vfenefs); |
18539 | | |
18540 | 0 | INTRINSIC_WITH_CC(s390_vfenezbs); |
18541 | 0 | INTRINSIC_WITH_CC(s390_vfenezhs); |
18542 | 0 | INTRINSIC_WITH_CC(s390_vfenezfs); |
18543 | | |
18544 | 0 | INTRINSIC_WITH_CC(s390_vistrbs); |
18545 | 0 | INTRINSIC_WITH_CC(s390_vistrhs); |
18546 | 0 | INTRINSIC_WITH_CC(s390_vistrfs); |
18547 | | |
18548 | 0 | INTRINSIC_WITH_CC(s390_vstrcbs); |
18549 | 0 | INTRINSIC_WITH_CC(s390_vstrchs); |
18550 | 0 | INTRINSIC_WITH_CC(s390_vstrcfs); |
18551 | | |
18552 | 0 | INTRINSIC_WITH_CC(s390_vstrczbs); |
18553 | 0 | INTRINSIC_WITH_CC(s390_vstrczhs); |
18554 | 0 | INTRINSIC_WITH_CC(s390_vstrczfs); |
18555 | | |
18556 | 0 | INTRINSIC_WITH_CC(s390_vfcesbs); |
18557 | 0 | INTRINSIC_WITH_CC(s390_vfcedbs); |
18558 | 0 | INTRINSIC_WITH_CC(s390_vfchsbs); |
18559 | 0 | INTRINSIC_WITH_CC(s390_vfchdbs); |
18560 | 0 | INTRINSIC_WITH_CC(s390_vfchesbs); |
18561 | 0 | INTRINSIC_WITH_CC(s390_vfchedbs); |
18562 | | |
18563 | 0 | INTRINSIC_WITH_CC(s390_vftcisb); |
18564 | 0 | INTRINSIC_WITH_CC(s390_vftcidb); |
18565 | | |
18566 | 0 | INTRINSIC_WITH_CC(s390_vstrsb); |
18567 | 0 | INTRINSIC_WITH_CC(s390_vstrsh); |
18568 | 0 | INTRINSIC_WITH_CC(s390_vstrsf); |
18569 | | |
18570 | 0 | INTRINSIC_WITH_CC(s390_vstrszb); |
18571 | 0 | INTRINSIC_WITH_CC(s390_vstrszh); |
18572 | 0 | INTRINSIC_WITH_CC(s390_vstrszf); |
18573 | | |
18574 | 0 | #undef INTRINSIC_WITH_CC |
18575 | | |
18576 | 0 | default: |
18577 | 0 | return nullptr; |
18578 | 0 | } |
18579 | 0 | } |
18580 | | |
18581 | | namespace { |
18582 | | // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. |
18583 | | struct NVPTXMmaLdstInfo { |
18584 | | unsigned NumResults; // Number of elements to load/store |
18585 | | // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. |
18586 | | unsigned IID_col; |
18587 | | unsigned IID_row; |
18588 | | }; |
18589 | | |
18590 | | #define MMA_INTR(geom_op_type, layout) \ |
18591 | 708 | Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride |
18592 | | #define MMA_LDST(n, geom_op_type) \ |
18593 | 348 | { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } |
18594 | | |
18595 | 360 | static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { |
18596 | 360 | switch (BuiltinID) { |
18597 | | // FP MMA loads |
18598 | 14 | case NVPTX::BI__hmma_m16n16k16_ld_a: |
18599 | 14 | return MMA_LDST(8, m16n16k16_load_a_f16); |
18600 | 14 | case NVPTX::BI__hmma_m16n16k16_ld_b: |
18601 | 14 | return MMA_LDST(8, m16n16k16_load_b_f16); |
18602 | 14 | case NVPTX::BI__hmma_m16n16k16_ld_c_f16: |
18603 | 14 | return MMA_LDST(4, m16n16k16_load_c_f16); |
18604 | 14 | case NVPTX::BI__hmma_m16n16k16_ld_c_f32: |
18605 | 14 | return MMA_LDST(8, m16n16k16_load_c_f32); |
18606 | 10 | case NVPTX::BI__hmma_m32n8k16_ld_a: |
18607 | 10 | return MMA_LDST(8, m32n8k16_load_a_f16); |
18608 | 10 | case NVPTX::BI__hmma_m32n8k16_ld_b: |
18609 | 10 | return MMA_LDST(8, m32n8k16_load_b_f16); |
18610 | 10 | case NVPTX::BI__hmma_m32n8k16_ld_c_f16: |
18611 | 10 | return MMA_LDST(4, m32n8k16_load_c_f16); |
18612 | 10 | case NVPTX::BI__hmma_m32n8k16_ld_c_f32: |
18613 | 10 | return MMA_LDST(8, m32n8k16_load_c_f32); |
18614 | 10 | case NVPTX::BI__hmma_m8n32k16_ld_a: |
18615 | 10 | return MMA_LDST(8, m8n32k16_load_a_f16); |
18616 | 10 | case NVPTX::BI__hmma_m8n32k16_ld_b: |
18617 | 10 | return MMA_LDST(8, m8n32k16_load_b_f16); |
18618 | 10 | case NVPTX::BI__hmma_m8n32k16_ld_c_f16: |
18619 | 10 | return MMA_LDST(4, m8n32k16_load_c_f16); |
18620 | 10 | case NVPTX::BI__hmma_m8n32k16_ld_c_f32: |
18621 | 10 | return MMA_LDST(8, m8n32k16_load_c_f32); |
18622 | | |
18623 | | // Integer MMA loads |
18624 | 4 | case NVPTX::BI__imma_m16n16k16_ld_a_s8: |
18625 | 4 | return MMA_LDST(2, m16n16k16_load_a_s8); |
18626 | 4 | case NVPTX::BI__imma_m16n16k16_ld_a_u8: |
18627 | 4 | return MMA_LDST(2, m16n16k16_load_a_u8); |
18628 | 4 | case NVPTX::BI__imma_m16n16k16_ld_b_s8: |
18629 | 4 | return MMA_LDST(2, m16n16k16_load_b_s8); |
18630 | 4 | case NVPTX::BI__imma_m16n16k16_ld_b_u8: |
18631 | 4 | return MMA_LDST(2, m16n16k16_load_b_u8); |
18632 | 4 | case NVPTX::BI__imma_m16n16k16_ld_c: |
18633 | 4 | return MMA_LDST(8, m16n16k16_load_c_s32); |
18634 | 4 | case NVPTX::BI__imma_m32n8k16_ld_a_s8: |
18635 | 4 | return MMA_LDST(4, m32n8k16_load_a_s8); |
18636 | 4 | case NVPTX::BI__imma_m32n8k16_ld_a_u8: |
18637 | 4 | return MMA_LDST(4, m32n8k16_load_a_u8); |
18638 | 4 | case NVPTX::BI__imma_m32n8k16_ld_b_s8: |
18639 | 4 | return MMA_LDST(1, m32n8k16_load_b_s8); |
18640 | 4 | case NVPTX::BI__imma_m32n8k16_ld_b_u8: |
18641 | 4 | return MMA_LDST(1, m32n8k16_load_b_u8); |
18642 | 4 | case NVPTX::BI__imma_m32n8k16_ld_c: |
18643 | 4 | return MMA_LDST(8, m32n8k16_load_c_s32); |
18644 | 4 | case NVPTX::BI__imma_m8n32k16_ld_a_s8: |
18645 | 4 | return MMA_LDST(1, m8n32k16_load_a_s8); |
18646 | 4 | case NVPTX::BI__imma_m8n32k16_ld_a_u8: |
18647 | 4 | return MMA_LDST(1, m8n32k16_load_a_u8); |
18648 | 4 | case NVPTX::BI__imma_m8n32k16_ld_b_s8: |
18649 | 4 | return MMA_LDST(4, m8n32k16_load_b_s8); |
18650 | 4 | case NVPTX::BI__imma_m8n32k16_ld_b_u8: |
18651 | 4 | return MMA_LDST(4, m8n32k16_load_b_u8); |
18652 | 4 | case NVPTX::BI__imma_m8n32k16_ld_c: |
18653 | 4 | return MMA_LDST(8, m8n32k16_load_c_s32); |
18654 | | |
18655 | | // Sub-integer MMA loads. |
18656 | | // Only row/col layout is supported by A/B fragments. |
18657 | 2 | case NVPTX::BI__imma_m8n8k32_ld_a_s4: |
18658 | 2 | return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; |
18659 | 2 | case NVPTX::BI__imma_m8n8k32_ld_a_u4: |
18660 | 2 | return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; |
18661 | 2 | case NVPTX::BI__imma_m8n8k32_ld_b_s4: |
18662 | 2 | return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; |
18663 | 2 | case NVPTX::BI__imma_m8n8k32_ld_b_u4: |
18664 | 2 | return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; |
18665 | 4 | case NVPTX::BI__imma_m8n8k32_ld_c: |
18666 | 4 | return MMA_LDST(2, m8n8k32_load_c_s32); |
18667 | 2 | case NVPTX::BI__bmma_m8n8k128_ld_a_b1: |
18668 | 2 | return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; |
18669 | 2 | case NVPTX::BI__bmma_m8n8k128_ld_b_b1: |
18670 | 2 | return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; |
18671 | 4 | case NVPTX::BI__bmma_m8n8k128_ld_c: |
18672 | 4 | return MMA_LDST(2, m8n8k128_load_c_s32); |
18673 | | |
18674 | | // Double MMA loads |
18675 | 4 | case NVPTX::BI__dmma_m8n8k4_ld_a: |
18676 | 4 | return MMA_LDST(1, m8n8k4_load_a_f64); |
18677 | 4 | case NVPTX::BI__dmma_m8n8k4_ld_b: |
18678 | 4 | return MMA_LDST(1, m8n8k4_load_b_f64); |
18679 | 4 | case NVPTX::BI__dmma_m8n8k4_ld_c: |
18680 | 4 | return MMA_LDST(2, m8n8k4_load_c_f64); |
18681 | | |
18682 | | // Alternate float MMA loads |
18683 | 4 | case NVPTX::BI__mma_bf16_m16n16k16_ld_a: |
18684 | 4 | return MMA_LDST(4, m16n16k16_load_a_bf16); |
18685 | 4 | case NVPTX::BI__mma_bf16_m16n16k16_ld_b: |
18686 | 4 | return MMA_LDST(4, m16n16k16_load_b_bf16); |
18687 | 4 | case NVPTX::BI__mma_bf16_m8n32k16_ld_a: |
18688 | 4 | return MMA_LDST(2, m8n32k16_load_a_bf16); |
18689 | 4 | case NVPTX::BI__mma_bf16_m8n32k16_ld_b: |
18690 | 4 | return MMA_LDST(8, m8n32k16_load_b_bf16); |
18691 | 4 | case NVPTX::BI__mma_bf16_m32n8k16_ld_a: |
18692 | 4 | return MMA_LDST(8, m32n8k16_load_a_bf16); |
18693 | 4 | case NVPTX::BI__mma_bf16_m32n8k16_ld_b: |
18694 | 4 | return MMA_LDST(2, m32n8k16_load_b_bf16); |
18695 | 4 | case NVPTX::BI__mma_tf32_m16n16k8_ld_a: |
18696 | 4 | return MMA_LDST(4, m16n16k8_load_a_tf32); |
18697 | 4 | case NVPTX::BI__mma_tf32_m16n16k8_ld_b: |
18698 | 4 | return MMA_LDST(4, m16n16k8_load_b_tf32); |
18699 | 4 | case NVPTX::BI__mma_tf32_m16n16k8_ld_c: |
18700 | 4 | return MMA_LDST(8, m16n16k8_load_c_f32); |
18701 | | |
18702 | | // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike |
18703 | | // PTX and LLVM IR where stores always use fragment D, NVCC builtins always |
18704 | | // use fragment C for both loads and stores. |
18705 | | // FP MMA stores. |
18706 | 14 | case NVPTX::BI__hmma_m16n16k16_st_c_f16: |
18707 | 14 | return MMA_LDST(4, m16n16k16_store_d_f16); |
18708 | 14 | case NVPTX::BI__hmma_m16n16k16_st_c_f32: |
18709 | 14 | return MMA_LDST(8, m16n16k16_store_d_f32); |
18710 | 10 | case NVPTX::BI__hmma_m32n8k16_st_c_f16: |
18711 | 10 | return MMA_LDST(4, m32n8k16_store_d_f16); |
18712 | 10 | case NVPTX::BI__hmma_m32n8k16_st_c_f32: |
18713 | 10 | return MMA_LDST(8, m32n8k16_store_d_f32); |
18714 | 10 | case NVPTX::BI__hmma_m8n32k16_st_c_f16: |
18715 | 10 | return MMA_LDST(4, m8n32k16_store_d_f16); |
18716 | 10 | case NVPTX::BI__hmma_m8n32k16_st_c_f32: |
18717 | 10 | return MMA_LDST(8, m8n32k16_store_d_f32); |
18718 | | |
18719 | | // Integer and sub-integer MMA stores. |
18720 | | // Another naming quirk. Unlike other MMA builtins that use PTX types in the |
18721 | | // name, integer loads/stores use LLVM's i32. |
18722 | 4 | case NVPTX::BI__imma_m16n16k16_st_c_i32: |
18723 | 4 | return MMA_LDST(8, m16n16k16_store_d_s32); |
18724 | 4 | case NVPTX::BI__imma_m32n8k16_st_c_i32: |
18725 | 4 | return MMA_LDST(8, m32n8k16_store_d_s32); |
18726 | 4 | case NVPTX::BI__imma_m8n32k16_st_c_i32: |
18727 | 4 | return MMA_LDST(8, m8n32k16_store_d_s32); |
18728 | 4 | case NVPTX::BI__imma_m8n8k32_st_c_i32: |
18729 | 4 | return MMA_LDST(2, m8n8k32_store_d_s32); |
18730 | 4 | case NVPTX::BI__bmma_m8n8k128_st_c_i32: |
18731 | 4 | return MMA_LDST(2, m8n8k128_store_d_s32); |
18732 | | |
18733 | | // Double MMA store |
18734 | 4 | case NVPTX::BI__dmma_m8n8k4_st_c_f64: |
18735 | 4 | return MMA_LDST(2, m8n8k4_store_d_f64); |
18736 | | |
18737 | | // Alternate float MMA store |
18738 | 4 | case NVPTX::BI__mma_m16n16k8_st_c_f32: |
18739 | 4 | return MMA_LDST(8, m16n16k8_store_d_f32); |
18740 | | |
18741 | 0 | default: |
18742 | 0 | llvm_unreachable("Unknown MMA builtin"); |
18743 | 360 | } |
18744 | 360 | } |
18745 | | #undef MMA_LDST |
18746 | | #undef MMA_INTR |
18747 | | |
18748 | | |
18749 | | struct NVPTXMmaInfo { |
18750 | | unsigned NumEltsA; |
18751 | | unsigned NumEltsB; |
18752 | | unsigned NumEltsC; |
18753 | | unsigned NumEltsD; |
18754 | | |
18755 | | // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority |
18756 | | // over 'col' for layout. The index of non-satf variants is expected to match |
18757 | | // the undocumented layout constants used by CUDA's mma.hpp. |
18758 | | std::array<unsigned, 8> Variants; |
18759 | | |
18760 | 692 | unsigned getMMAIntrinsic(int Layout, bool Satf) { |
18761 | 692 | unsigned Index = Layout + 4 * Satf; |
18762 | 692 | if (Index >= Variants.size()) |
18763 | 0 | return 0; |
18764 | 692 | return Variants[Index]; |
18765 | 692 | } |
18766 | | }; |
18767 | | |
18768 | | // Returns an intrinsic that matches Layout and Satf for valid combinations of |
18769 | | // Layout and Satf, 0 otherwise. |
18770 | 692 | static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { |
18771 | | // clang-format off |
18772 | 692 | #define MMA_VARIANTS(geom, type) \ |
18773 | 692 | Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ |
18774 | 680 | Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ |
18775 | 680 | Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ |
18776 | 680 | Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type |
18777 | 692 | #define MMA_SATF_VARIANTS(geom, type) \ |
18778 | 692 | MMA_VARIANTS640 (geom, type), \ |
18779 | 640 | Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ |
18780 | 640 | Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ |
18781 | 640 | Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ |
18782 | 640 | Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite |
18783 | | // Sub-integer MMA only supports row.col layout. |
18784 | 692 | #define MMA_VARIANTS_I4(geom, type) \ |
18785 | 692 | 0, \ |
18786 | 8 | Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ |
18787 | 8 | 0, \ |
18788 | 8 | 0, \ |
18789 | 8 | 0, \ |
18790 | 8 | Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ |
18791 | 8 | 0, \ |
18792 | 8 | 0 |
18793 | | // b1 MMA does not support .satfinite. |
18794 | 692 | #define MMA_VARIANTS_B1_XOR(geom, type) \ |
18795 | 692 | 0, \ |
18796 | 2 | Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \ |
18797 | 2 | 0, \ |
18798 | 2 | 0, \ |
18799 | 2 | 0, \ |
18800 | 2 | 0, \ |
18801 | 2 | 0, \ |
18802 | 2 | 0 |
18803 | 692 | #define MMA_VARIANTS_B1_AND(geom, type) \ |
18804 | 692 | 0, \ |
18805 | 2 | Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \ |
18806 | 2 | 0, \ |
18807 | 2 | 0, \ |
18808 | 2 | 0, \ |
18809 | 2 | 0, \ |
18810 | 2 | 0, \ |
18811 | 2 | 0 |
18812 | | // clang-format on |
18813 | 692 | switch (BuiltinID) { |
18814 | | // FP MMA |
18815 | | // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while |
18816 | | // NumEltsN of return value are ordered as A,B,C,D. |
18817 | 56 | case NVPTX::BI__hmma_m16n16k16_mma_f16f16: |
18818 | 56 | return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}}; |
18819 | 56 | case NVPTX::BI__hmma_m16n16k16_mma_f32f16: |
18820 | 56 | return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}}; |
18821 | 56 | case NVPTX::BI__hmma_m16n16k16_mma_f16f32: |
18822 | 56 | return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}}; |
18823 | 56 | case NVPTX::BI__hmma_m16n16k16_mma_f32f32: |
18824 | 56 | return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}}; |
18825 | 40 | case NVPTX::BI__hmma_m32n8k16_mma_f16f16: |
18826 | 40 | return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}}; |
18827 | 40 | case NVPTX::BI__hmma_m32n8k16_mma_f32f16: |
18828 | 40 | return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}}; |
18829 | 40 | case NVPTX::BI__hmma_m32n8k16_mma_f16f32: |
18830 | 40 | return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}}; |
18831 | 40 | case NVPTX::BI__hmma_m32n8k16_mma_f32f32: |
18832 | 40 | return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}}; |
18833 | 40 | case NVPTX::BI__hmma_m8n32k16_mma_f16f16: |
18834 | 40 | return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}}; |
18835 | 40 | case NVPTX::BI__hmma_m8n32k16_mma_f32f16: |
18836 | 40 | return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}}; |
18837 | 40 | case NVPTX::BI__hmma_m8n32k16_mma_f16f32: |
18838 | 40 | return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}}; |
18839 | 40 | case NVPTX::BI__hmma_m8n32k16_mma_f32f32: |
18840 | 40 | return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}}; |
18841 | | |
18842 | | // Integer MMA |
18843 | 16 | case NVPTX::BI__imma_m16n16k16_mma_s8: |
18844 | 16 | return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}}; |
18845 | 16 | case NVPTX::BI__imma_m16n16k16_mma_u8: |
18846 | 16 | return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}}; |
18847 | 16 | case NVPTX::BI__imma_m32n8k16_mma_s8: |
18848 | 16 | return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}}; |
18849 | 16 | case NVPTX::BI__imma_m32n8k16_mma_u8: |
18850 | 16 | return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}}; |
18851 | 16 | case NVPTX::BI__imma_m8n32k16_mma_s8: |
18852 | 16 | return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}}; |
18853 | 16 | case NVPTX::BI__imma_m8n32k16_mma_u8: |
18854 | 16 | return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}}; |
18855 | | |
18856 | | // Sub-integer MMA |
18857 | 4 | case NVPTX::BI__imma_m8n8k32_mma_s4: |
18858 | 4 | return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}}; |
18859 | 4 | case NVPTX::BI__imma_m8n8k32_mma_u4: |
18860 | 4 | return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}}; |
18861 | 2 | case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: |
18862 | 2 | return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}}; |
18863 | 2 | case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1: |
18864 | 2 | return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}}; |
18865 | | |
18866 | | // Double MMA |
18867 | 8 | case NVPTX::BI__dmma_m8n8k4_mma_f64: |
18868 | 8 | return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}}; |
18869 | | |
18870 | | // Alternate FP MMA |
18871 | 8 | case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: |
18872 | 8 | return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}}; |
18873 | 8 | case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: |
18874 | 8 | return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}}; |
18875 | 8 | case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: |
18876 | 8 | return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}}; |
18877 | 8 | case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: |
18878 | 8 | return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}}; |
18879 | 0 | default: |
18880 | 0 | llvm_unreachable("Unexpected builtin ID."); |
18881 | 692 | } |
18882 | 692 | #undef MMA_VARIANTS |
18883 | 692 | #undef MMA_SATF_VARIANTS |
18884 | 692 | #undef MMA_VARIANTS_I4 |
18885 | 692 | #undef MMA_VARIANTS_B1_AND |
18886 | 692 | #undef MMA_VARIANTS_B1_XOR |
18887 | 692 | } |
18888 | | |
18889 | | static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF, |
18890 | 0 | const CallExpr *E) { |
18891 | 0 | Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); |
18892 | 0 | QualType ArgType = E->getArg(0)->getType(); |
18893 | 0 | clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType); |
18894 | 0 | llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType()); |
18895 | 0 | return CGF.Builder.CreateCall( |
18896 | 0 | CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), |
18897 | 0 | {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())}); |
18898 | 0 | } |
18899 | | |
18900 | | static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF, |
18901 | 0 | const CallExpr *E) { |
18902 | 0 | Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); |
18903 | 0 | llvm::Type *ElemTy = |
18904 | 0 | CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); |
18905 | 0 | return CGF.Builder.CreateCall( |
18906 | 0 | CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), |
18907 | 0 | {Ptr, CGF.EmitScalarExpr(E->getArg(1))}); |
18908 | 0 | } |
18909 | | |
18910 | | static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS, |
18911 | | CodeGenFunction &CGF, const CallExpr *E, |
18912 | 0 | int SrcSize) { |
18913 | 0 | return E->getNumArgs() == 3 |
18914 | 0 | ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS), |
18915 | 0 | {CGF.EmitScalarExpr(E->getArg(0)), |
18916 | 0 | CGF.EmitScalarExpr(E->getArg(1)), |
18917 | 0 | CGF.EmitScalarExpr(E->getArg(2))}) |
18918 | 0 | : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID), |
18919 | 0 | {CGF.EmitScalarExpr(E->getArg(0)), |
18920 | 0 | CGF.EmitScalarExpr(E->getArg(1))}); |
18921 | 0 | } |
18922 | | |
18923 | | static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID, |
18924 | 0 | const CallExpr *E, CodeGenFunction &CGF) { |
18925 | 0 | auto &C = CGF.CGM.getContext(); |
18926 | 0 | if (!(C.getLangOpts().NativeHalfType || |
18927 | 0 | !C.getTargetInfo().useFP16ConversionIntrinsics())) { |
18928 | 0 | CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() + |
18929 | 0 | " requires native half type support."); |
18930 | 0 | return nullptr; |
18931 | 0 | } |
18932 | | |
18933 | 0 | if (IntrinsicID == Intrinsic::nvvm_ldg_global_f || |
18934 | 0 | IntrinsicID == Intrinsic::nvvm_ldu_global_f) |
18935 | 0 | return MakeLdgLdu(IntrinsicID, CGF, E); |
18936 | | |
18937 | 0 | SmallVector<Value *, 16> Args; |
18938 | 0 | auto *F = CGF.CGM.getIntrinsic(IntrinsicID); |
18939 | 0 | auto *FTy = F->getFunctionType(); |
18940 | 0 | unsigned ICEArguments = 0; |
18941 | 0 | ASTContext::GetBuiltinTypeError Error; |
18942 | 0 | C.GetBuiltinType(BuiltinID, Error, &ICEArguments); |
18943 | 0 | assert(Error == ASTContext::GE_None && "Should not codegen an error"); |
18944 | 0 | for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { |
18945 | 0 | assert((ICEArguments & (1 << i)) == 0); |
18946 | 0 | auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i)); |
18947 | 0 | auto *PTy = FTy->getParamType(i); |
18948 | 0 | if (PTy != ArgValue->getType()) |
18949 | 0 | ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy); |
18950 | 0 | Args.push_back(ArgValue); |
18951 | 0 | } |
18952 | | |
18953 | 0 | return CGF.Builder.CreateCall(F, Args); |
18954 | 0 | } |
18955 | | } // namespace |
18956 | | |
18957 | | Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, |
18958 | 1.09k | const CallExpr *E) { |
18959 | 1.09k | switch (BuiltinID) { |
18960 | 0 | case NVPTX::BI__nvvm_atom_add_gen_i: |
18961 | 0 | case NVPTX::BI__nvvm_atom_add_gen_l: |
18962 | 0 | case NVPTX::BI__nvvm_atom_add_gen_ll: |
18963 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); |
18964 | | |
18965 | 0 | case NVPTX::BI__nvvm_atom_sub_gen_i: |
18966 | 0 | case NVPTX::BI__nvvm_atom_sub_gen_l: |
18967 | 0 | case NVPTX::BI__nvvm_atom_sub_gen_ll: |
18968 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); |
18969 | | |
18970 | 0 | case NVPTX::BI__nvvm_atom_and_gen_i: |
18971 | 0 | case NVPTX::BI__nvvm_atom_and_gen_l: |
18972 | 0 | case NVPTX::BI__nvvm_atom_and_gen_ll: |
18973 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); |
18974 | | |
18975 | 0 | case NVPTX::BI__nvvm_atom_or_gen_i: |
18976 | 0 | case NVPTX::BI__nvvm_atom_or_gen_l: |
18977 | 0 | case NVPTX::BI__nvvm_atom_or_gen_ll: |
18978 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); |
18979 | | |
18980 | 0 | case NVPTX::BI__nvvm_atom_xor_gen_i: |
18981 | 0 | case NVPTX::BI__nvvm_atom_xor_gen_l: |
18982 | 0 | case NVPTX::BI__nvvm_atom_xor_gen_ll: |
18983 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); |
18984 | | |
18985 | 0 | case NVPTX::BI__nvvm_atom_xchg_gen_i: |
18986 | 0 | case NVPTX::BI__nvvm_atom_xchg_gen_l: |
18987 | 0 | case NVPTX::BI__nvvm_atom_xchg_gen_ll: |
18988 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); |
18989 | | |
18990 | 0 | case NVPTX::BI__nvvm_atom_max_gen_i: |
18991 | 0 | case NVPTX::BI__nvvm_atom_max_gen_l: |
18992 | 0 | case NVPTX::BI__nvvm_atom_max_gen_ll: |
18993 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); |
18994 | | |
18995 | 0 | case NVPTX::BI__nvvm_atom_max_gen_ui: |
18996 | 0 | case NVPTX::BI__nvvm_atom_max_gen_ul: |
18997 | 0 | case NVPTX::BI__nvvm_atom_max_gen_ull: |
18998 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); |
18999 | | |
19000 | 0 | case NVPTX::BI__nvvm_atom_min_gen_i: |
19001 | 0 | case NVPTX::BI__nvvm_atom_min_gen_l: |
19002 | 0 | case NVPTX::BI__nvvm_atom_min_gen_ll: |
19003 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); |
19004 | | |
19005 | 0 | case NVPTX::BI__nvvm_atom_min_gen_ui: |
19006 | 0 | case NVPTX::BI__nvvm_atom_min_gen_ul: |
19007 | 0 | case NVPTX::BI__nvvm_atom_min_gen_ull: |
19008 | 0 | return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); |
19009 | | |
19010 | 0 | case NVPTX::BI__nvvm_atom_cas_gen_i: |
19011 | 0 | case NVPTX::BI__nvvm_atom_cas_gen_l: |
19012 | 0 | case NVPTX::BI__nvvm_atom_cas_gen_ll: |
19013 | | // __nvvm_atom_cas_gen_* should return the old value rather than the |
19014 | | // success flag. |
19015 | 0 | return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); |
19016 | | |
19017 | 0 | case NVPTX::BI__nvvm_atom_add_gen_f: |
19018 | 2 | case NVPTX::BI__nvvm_atom_add_gen_d: { |
19019 | 2 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
19020 | 2 | Value *Val = EmitScalarExpr(E->getArg(1)); |
19021 | 2 | return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val, |
19022 | 2 | AtomicOrdering::SequentiallyConsistent); |
19023 | 0 | } |
19024 | | |
19025 | 0 | case NVPTX::BI__nvvm_atom_inc_gen_ui: { |
19026 | 0 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
19027 | 0 | Value *Val = EmitScalarExpr(E->getArg(1)); |
19028 | 0 | Function *FnALI32 = |
19029 | 0 | CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); |
19030 | 0 | return Builder.CreateCall(FnALI32, {Ptr, Val}); |
19031 | 0 | } |
19032 | | |
19033 | 0 | case NVPTX::BI__nvvm_atom_dec_gen_ui: { |
19034 | 0 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
19035 | 0 | Value *Val = EmitScalarExpr(E->getArg(1)); |
19036 | 0 | Function *FnALD32 = |
19037 | 0 | CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); |
19038 | 0 | return Builder.CreateCall(FnALD32, {Ptr, Val}); |
19039 | 0 | } |
19040 | | |
19041 | 0 | case NVPTX::BI__nvvm_ldg_c: |
19042 | 0 | case NVPTX::BI__nvvm_ldg_sc: |
19043 | 0 | case NVPTX::BI__nvvm_ldg_c2: |
19044 | 0 | case NVPTX::BI__nvvm_ldg_sc2: |
19045 | 0 | case NVPTX::BI__nvvm_ldg_c4: |
19046 | 0 | case NVPTX::BI__nvvm_ldg_sc4: |
19047 | 0 | case NVPTX::BI__nvvm_ldg_s: |
19048 | 0 | case NVPTX::BI__nvvm_ldg_s2: |
19049 | 0 | case NVPTX::BI__nvvm_ldg_s4: |
19050 | 0 | case NVPTX::BI__nvvm_ldg_i: |
19051 | 0 | case NVPTX::BI__nvvm_ldg_i2: |
19052 | 0 | case NVPTX::BI__nvvm_ldg_i4: |
19053 | 0 | case NVPTX::BI__nvvm_ldg_l: |
19054 | 0 | case NVPTX::BI__nvvm_ldg_l2: |
19055 | 0 | case NVPTX::BI__nvvm_ldg_ll: |
19056 | 0 | case NVPTX::BI__nvvm_ldg_ll2: |
19057 | 0 | case NVPTX::BI__nvvm_ldg_uc: |
19058 | 0 | case NVPTX::BI__nvvm_ldg_uc2: |
19059 | 0 | case NVPTX::BI__nvvm_ldg_uc4: |
19060 | 0 | case NVPTX::BI__nvvm_ldg_us: |
19061 | 0 | case NVPTX::BI__nvvm_ldg_us2: |
19062 | 0 | case NVPTX::BI__nvvm_ldg_us4: |
19063 | 0 | case NVPTX::BI__nvvm_ldg_ui: |
19064 | 0 | case NVPTX::BI__nvvm_ldg_ui2: |
19065 | 0 | case NVPTX::BI__nvvm_ldg_ui4: |
19066 | 0 | case NVPTX::BI__nvvm_ldg_ul: |
19067 | 0 | case NVPTX::BI__nvvm_ldg_ul2: |
19068 | 0 | case NVPTX::BI__nvvm_ldg_ull: |
19069 | 0 | case NVPTX::BI__nvvm_ldg_ull2: |
19070 | | // PTX Interoperability section 2.2: "For a vector with an even number of |
19071 | | // elements, its alignment is set to number of elements times the alignment |
19072 | | // of its member: n*alignof(t)." |
19073 | 0 | return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E); |
19074 | 0 | case NVPTX::BI__nvvm_ldg_f: |
19075 | 0 | case NVPTX::BI__nvvm_ldg_f2: |
19076 | 0 | case NVPTX::BI__nvvm_ldg_f4: |
19077 | 0 | case NVPTX::BI__nvvm_ldg_d: |
19078 | 0 | case NVPTX::BI__nvvm_ldg_d2: |
19079 | 0 | return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E); |
19080 | | |
19081 | 0 | case NVPTX::BI__nvvm_ldu_c: |
19082 | 0 | case NVPTX::BI__nvvm_ldu_sc: |
19083 | 0 | case NVPTX::BI__nvvm_ldu_c2: |
19084 | 0 | case NVPTX::BI__nvvm_ldu_sc2: |
19085 | 0 | case NVPTX::BI__nvvm_ldu_c4: |
19086 | 0 | case NVPTX::BI__nvvm_ldu_sc4: |
19087 | 0 | case NVPTX::BI__nvvm_ldu_s: |
19088 | 0 | case NVPTX::BI__nvvm_ldu_s2: |
19089 | 0 | case NVPTX::BI__nvvm_ldu_s4: |
19090 | 0 | case NVPTX::BI__nvvm_ldu_i: |
19091 | 0 | case NVPTX::BI__nvvm_ldu_i2: |
19092 | 0 | case NVPTX::BI__nvvm_ldu_i4: |
19093 | 0 | case NVPTX::BI__nvvm_ldu_l: |
19094 | 0 | case NVPTX::BI__nvvm_ldu_l2: |
19095 | 0 | case NVPTX::BI__nvvm_ldu_ll: |
19096 | 0 | case NVPTX::BI__nvvm_ldu_ll2: |
19097 | 0 | case NVPTX::BI__nvvm_ldu_uc: |
19098 | 0 | case NVPTX::BI__nvvm_ldu_uc2: |
19099 | 0 | case NVPTX::BI__nvvm_ldu_uc4: |
19100 | 0 | case NVPTX::BI__nvvm_ldu_us: |
19101 | 0 | case NVPTX::BI__nvvm_ldu_us2: |
19102 | 0 | case NVPTX::BI__nvvm_ldu_us4: |
19103 | 0 | case NVPTX::BI__nvvm_ldu_ui: |
19104 | 0 | case NVPTX::BI__nvvm_ldu_ui2: |
19105 | 0 | case NVPTX::BI__nvvm_ldu_ui4: |
19106 | 0 | case NVPTX::BI__nvvm_ldu_ul: |
19107 | 0 | case NVPTX::BI__nvvm_ldu_ul2: |
19108 | 0 | case NVPTX::BI__nvvm_ldu_ull: |
19109 | 0 | case NVPTX::BI__nvvm_ldu_ull2: |
19110 | 0 | return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E); |
19111 | 0 | case NVPTX::BI__nvvm_ldu_f: |
19112 | 0 | case NVPTX::BI__nvvm_ldu_f2: |
19113 | 0 | case NVPTX::BI__nvvm_ldu_f4: |
19114 | 0 | case NVPTX::BI__nvvm_ldu_d: |
19115 | 0 | case NVPTX::BI__nvvm_ldu_d2: |
19116 | 0 | return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E); |
19117 | | |
19118 | 0 | case NVPTX::BI__nvvm_atom_cta_add_gen_i: |
19119 | 0 | case NVPTX::BI__nvvm_atom_cta_add_gen_l: |
19120 | 0 | case NVPTX::BI__nvvm_atom_cta_add_gen_ll: |
19121 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E); |
19122 | 0 | case NVPTX::BI__nvvm_atom_sys_add_gen_i: |
19123 | 0 | case NVPTX::BI__nvvm_atom_sys_add_gen_l: |
19124 | 0 | case NVPTX::BI__nvvm_atom_sys_add_gen_ll: |
19125 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E); |
19126 | 0 | case NVPTX::BI__nvvm_atom_cta_add_gen_f: |
19127 | 0 | case NVPTX::BI__nvvm_atom_cta_add_gen_d: |
19128 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E); |
19129 | 0 | case NVPTX::BI__nvvm_atom_sys_add_gen_f: |
19130 | 0 | case NVPTX::BI__nvvm_atom_sys_add_gen_d: |
19131 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E); |
19132 | 0 | case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: |
19133 | 0 | case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: |
19134 | 0 | case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: |
19135 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E); |
19136 | 0 | case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: |
19137 | 0 | case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: |
19138 | 0 | case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: |
19139 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E); |
19140 | 0 | case NVPTX::BI__nvvm_atom_cta_max_gen_i: |
19141 | 0 | case NVPTX::BI__nvvm_atom_cta_max_gen_ui: |
19142 | 0 | case NVPTX::BI__nvvm_atom_cta_max_gen_l: |
19143 | 0 | case NVPTX::BI__nvvm_atom_cta_max_gen_ul: |
19144 | 0 | case NVPTX::BI__nvvm_atom_cta_max_gen_ll: |
19145 | 0 | case NVPTX::BI__nvvm_atom_cta_max_gen_ull: |
19146 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E); |
19147 | 0 | case NVPTX::BI__nvvm_atom_sys_max_gen_i: |
19148 | 0 | case NVPTX::BI__nvvm_atom_sys_max_gen_ui: |
19149 | 0 | case NVPTX::BI__nvvm_atom_sys_max_gen_l: |
19150 | 0 | case NVPTX::BI__nvvm_atom_sys_max_gen_ul: |
19151 | 0 | case NVPTX::BI__nvvm_atom_sys_max_gen_ll: |
19152 | 0 | case NVPTX::BI__nvvm_atom_sys_max_gen_ull: |
19153 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E); |
19154 | 0 | case NVPTX::BI__nvvm_atom_cta_min_gen_i: |
19155 | 0 | case NVPTX::BI__nvvm_atom_cta_min_gen_ui: |
19156 | 0 | case NVPTX::BI__nvvm_atom_cta_min_gen_l: |
19157 | 0 | case NVPTX::BI__nvvm_atom_cta_min_gen_ul: |
19158 | 0 | case NVPTX::BI__nvvm_atom_cta_min_gen_ll: |
19159 | 0 | case NVPTX::BI__nvvm_atom_cta_min_gen_ull: |
19160 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E); |
19161 | 0 | case NVPTX::BI__nvvm_atom_sys_min_gen_i: |
19162 | 0 | case NVPTX::BI__nvvm_atom_sys_min_gen_ui: |
19163 | 0 | case NVPTX::BI__nvvm_atom_sys_min_gen_l: |
19164 | 0 | case NVPTX::BI__nvvm_atom_sys_min_gen_ul: |
19165 | 0 | case NVPTX::BI__nvvm_atom_sys_min_gen_ll: |
19166 | 0 | case NVPTX::BI__nvvm_atom_sys_min_gen_ull: |
19167 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E); |
19168 | 0 | case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: |
19169 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E); |
19170 | 0 | case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: |
19171 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E); |
19172 | 0 | case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: |
19173 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E); |
19174 | 0 | case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: |
19175 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E); |
19176 | 0 | case NVPTX::BI__nvvm_atom_cta_and_gen_i: |
19177 | 0 | case NVPTX::BI__nvvm_atom_cta_and_gen_l: |
19178 | 0 | case NVPTX::BI__nvvm_atom_cta_and_gen_ll: |
19179 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E); |
19180 | 0 | case NVPTX::BI__nvvm_atom_sys_and_gen_i: |
19181 | 0 | case NVPTX::BI__nvvm_atom_sys_and_gen_l: |
19182 | 0 | case NVPTX::BI__nvvm_atom_sys_and_gen_ll: |
19183 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E); |
19184 | 0 | case NVPTX::BI__nvvm_atom_cta_or_gen_i: |
19185 | 0 | case NVPTX::BI__nvvm_atom_cta_or_gen_l: |
19186 | 0 | case NVPTX::BI__nvvm_atom_cta_or_gen_ll: |
19187 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E); |
19188 | 0 | case NVPTX::BI__nvvm_atom_sys_or_gen_i: |
19189 | 0 | case NVPTX::BI__nvvm_atom_sys_or_gen_l: |
19190 | 0 | case NVPTX::BI__nvvm_atom_sys_or_gen_ll: |
19191 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E); |
19192 | 0 | case NVPTX::BI__nvvm_atom_cta_xor_gen_i: |
19193 | 0 | case NVPTX::BI__nvvm_atom_cta_xor_gen_l: |
19194 | 0 | case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: |
19195 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E); |
19196 | 0 | case NVPTX::BI__nvvm_atom_sys_xor_gen_i: |
19197 | 0 | case NVPTX::BI__nvvm_atom_sys_xor_gen_l: |
19198 | 0 | case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: |
19199 | 0 | return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E); |
19200 | 0 | case NVPTX::BI__nvvm_atom_cta_cas_gen_i: |
19201 | 0 | case NVPTX::BI__nvvm_atom_cta_cas_gen_l: |
19202 | 0 | case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { |
19203 | 0 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
19204 | 0 | llvm::Type *ElemTy = |
19205 | 0 | ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); |
19206 | 0 | return Builder.CreateCall( |
19207 | 0 | CGM.getIntrinsic( |
19208 | 0 | Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}), |
19209 | 0 | {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); |
19210 | 0 | } |
19211 | 0 | case NVPTX::BI__nvvm_atom_sys_cas_gen_i: |
19212 | 0 | case NVPTX::BI__nvvm_atom_sys_cas_gen_l: |
19213 | 0 | case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { |
19214 | 0 | Value *Ptr = EmitScalarExpr(E->getArg(0)); |
19215 | 0 | llvm::Type *ElemTy = |
19216 | 0 | ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); |
19217 | 0 | return Builder.CreateCall( |
19218 | 0 | CGM.getIntrinsic( |
19219 | 0 | Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}), |
19220 | 0 | {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); |
19221 | 0 | } |
19222 | 4 | case NVPTX::BI__nvvm_match_all_sync_i32p: |
19223 | 8 | case NVPTX::BI__nvvm_match_all_sync_i64p: { |
19224 | 8 | Value *Mask = EmitScalarExpr(E->getArg(0)); |
19225 | 8 | Value *Val = EmitScalarExpr(E->getArg(1)); |
19226 | 8 | Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); |
19227 | 8 | Value *ResultPair = Builder.CreateCall( |
19228 | 8 | CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p |
19229 | 8 | ? Intrinsic::nvvm_match_all_sync_i32p4 |
19230 | 8 | : Intrinsic::nvvm_match_all_sync_i64p4 ), |
19231 | 8 | {Mask, Val}); |
19232 | 8 | Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), |
19233 | 8 | PredOutPtr.getElementType()); |
19234 | 8 | Builder.CreateStore(Pred, PredOutPtr); |
19235 | 8 | return Builder.CreateExtractValue(ResultPair, 0); |
19236 | 4 | } |
19237 | | |
19238 | | // FP MMA loads |
19239 | 14 | case NVPTX::BI__hmma_m16n16k16_ld_a: |
19240 | 28 | case NVPTX::BI__hmma_m16n16k16_ld_b: |
19241 | 42 | case NVPTX::BI__hmma_m16n16k16_ld_c_f16: |
19242 | 56 | case NVPTX::BI__hmma_m16n16k16_ld_c_f32: |
19243 | 66 | case NVPTX::BI__hmma_m32n8k16_ld_a: |
19244 | 76 | case NVPTX::BI__hmma_m32n8k16_ld_b: |
19245 | 86 | case NVPTX::BI__hmma_m32n8k16_ld_c_f16: |
19246 | 96 | case NVPTX::BI__hmma_m32n8k16_ld_c_f32: |
19247 | 106 | case NVPTX::BI__hmma_m8n32k16_ld_a: |
19248 | 116 | case NVPTX::BI__hmma_m8n32k16_ld_b: |
19249 | 126 | case NVPTX::BI__hmma_m8n32k16_ld_c_f16: |
19250 | 136 | case NVPTX::BI__hmma_m8n32k16_ld_c_f32: |
19251 | | // Integer MMA loads. |
19252 | 140 | case NVPTX::BI__imma_m16n16k16_ld_a_s8: |
19253 | 144 | case NVPTX::BI__imma_m16n16k16_ld_a_u8: |
19254 | 148 | case NVPTX::BI__imma_m16n16k16_ld_b_s8: |
19255 | 152 | case NVPTX::BI__imma_m16n16k16_ld_b_u8: |
19256 | 156 | case NVPTX::BI__imma_m16n16k16_ld_c: |
19257 | 160 | case NVPTX::BI__imma_m32n8k16_ld_a_s8: |
19258 | 164 | case NVPTX::BI__imma_m32n8k16_ld_a_u8: |
19259 | 168 | case NVPTX::BI__imma_m32n8k16_ld_b_s8: |
19260 | 172 | case NVPTX::BI__imma_m32n8k16_ld_b_u8: |
19261 | 176 | case NVPTX::BI__imma_m32n8k16_ld_c: |
19262 | 180 | case NVPTX::BI__imma_m8n32k16_ld_a_s8: |
19263 | 184 | case NVPTX::BI__imma_m8n32k16_ld_a_u8: |
19264 | 188 | case NVPTX::BI__imma_m8n32k16_ld_b_s8: |
19265 | 192 | case NVPTX::BI__imma_m8n32k16_ld_b_u8: |
19266 | 196 | case NVPTX::BI__imma_m8n32k16_ld_c: |
19267 | | // Sub-integer MMA loads. |
19268 | 198 | case NVPTX::BI__imma_m8n8k32_ld_a_s4: |
19269 | 200 | case NVPTX::BI__imma_m8n8k32_ld_a_u4: |
19270 | 202 | case NVPTX::BI__imma_m8n8k32_ld_b_s4: |
19271 | 204 | case NVPTX::BI__imma_m8n8k32_ld_b_u4: |
19272 | 208 | case NVPTX::BI__imma_m8n8k32_ld_c: |
19273 | 210 | case NVPTX::BI__bmma_m8n8k128_ld_a_b1: |
19274 | 212 | case NVPTX::BI__bmma_m8n8k128_ld_b_b1: |
19275 | 216 | case NVPTX::BI__bmma_m8n8k128_ld_c: |
19276 | | // Double MMA loads. |
19277 | 220 | case NVPTX::BI__dmma_m8n8k4_ld_a: |
19278 | 224 | case NVPTX::BI__dmma_m8n8k4_ld_b: |
19279 | 228 | case NVPTX::BI__dmma_m8n8k4_ld_c: |
19280 | | // Alternate float MMA loads. |
19281 | 232 | case NVPTX::BI__mma_bf16_m16n16k16_ld_a: |
19282 | 236 | case NVPTX::BI__mma_bf16_m16n16k16_ld_b: |
19283 | 240 | case NVPTX::BI__mma_bf16_m8n32k16_ld_a: |
19284 | 244 | case NVPTX::BI__mma_bf16_m8n32k16_ld_b: |
19285 | 248 | case NVPTX::BI__mma_bf16_m32n8k16_ld_a: |
19286 | 252 | case NVPTX::BI__mma_bf16_m32n8k16_ld_b: |
19287 | 256 | case NVPTX::BI__mma_tf32_m16n16k8_ld_a: |
19288 | 260 | case NVPTX::BI__mma_tf32_m16n16k8_ld_b: |
19289 | 264 | case NVPTX::BI__mma_tf32_m16n16k8_ld_c: { |
19290 | 264 | Address Dst = EmitPointerWithAlignment(E->getArg(0)); |
19291 | 264 | Value *Src = EmitScalarExpr(E->getArg(1)); |
19292 | 264 | Value *Ldm = EmitScalarExpr(E->getArg(2)); |
19293 | 264 | std::optional<llvm::APSInt> isColMajorArg = |
19294 | 264 | E->getArg(3)->getIntegerConstantExpr(getContext()); |
19295 | 264 | if (!isColMajorArg) |
19296 | 0 | return nullptr; |
19297 | 264 | bool isColMajor = isColMajorArg->getSExtValue(); |
19298 | 264 | NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); |
19299 | 264 | unsigned IID = isColMajor ? II.IID_col132 : II.IID_row132 ; |
19300 | 264 | if (IID == 0) |
19301 | 0 | return nullptr; |
19302 | | |
19303 | 264 | Value *Result = |
19304 | 264 | Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); |
19305 | | |
19306 | | // Save returned values. |
19307 | 264 | assert(II.NumResults); |
19308 | 264 | if (II.NumResults == 1) { |
19309 | 36 | Builder.CreateAlignedStore(Result, Dst.getPointer(), |
19310 | 36 | CharUnits::fromQuantity(4)); |
19311 | 228 | } else { |
19312 | 1.57k | for (unsigned i = 0; i < II.NumResults; ++i1.34k ) { |
19313 | 1.34k | Builder.CreateAlignedStore( |
19314 | 1.34k | Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), |
19315 | 1.34k | Dst.getElementType()), |
19316 | 1.34k | Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(), |
19317 | 1.34k | llvm::ConstantInt::get(IntTy, i)), |
19318 | 1.34k | CharUnits::fromQuantity(4)); |
19319 | 1.34k | } |
19320 | 228 | } |
19321 | 264 | return Result; |
19322 | 264 | } |
19323 | | |
19324 | 14 | case NVPTX::BI__hmma_m16n16k16_st_c_f16: |
19325 | 28 | case NVPTX::BI__hmma_m16n16k16_st_c_f32: |
19326 | 38 | case NVPTX::BI__hmma_m32n8k16_st_c_f16: |
19327 | 48 | case NVPTX::BI__hmma_m32n8k16_st_c_f32: |
19328 | 58 | case NVPTX::BI__hmma_m8n32k16_st_c_f16: |
19329 | 68 | case NVPTX::BI__hmma_m8n32k16_st_c_f32: |
19330 | 72 | case NVPTX::BI__imma_m16n16k16_st_c_i32: |
19331 | 76 | case NVPTX::BI__imma_m32n8k16_st_c_i32: |
19332 | 80 | case NVPTX::BI__imma_m8n32k16_st_c_i32: |
19333 | 84 | case NVPTX::BI__imma_m8n8k32_st_c_i32: |
19334 | 88 | case NVPTX::BI__bmma_m8n8k128_st_c_i32: |
19335 | 92 | case NVPTX::BI__dmma_m8n8k4_st_c_f64: |
19336 | 96 | case NVPTX::BI__mma_m16n16k8_st_c_f32: { |
19337 | 96 | Value *Dst = EmitScalarExpr(E->getArg(0)); |
19338 | 96 | Address Src = EmitPointerWithAlignment(E->getArg(1)); |
19339 | 96 | Value *Ldm = EmitScalarExpr(E->getArg(2)); |
19340 | 96 | std::optional<llvm::APSInt> isColMajorArg = |
19341 | 96 | E->getArg(3)->getIntegerConstantExpr(getContext()); |
19342 | 96 | if (!isColMajorArg) |
19343 | 0 | return nullptr; |
19344 | 96 | bool isColMajor = isColMajorArg->getSExtValue(); |
19345 | 96 | NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); |
19346 | 96 | unsigned IID = isColMajor ? II.IID_col48 : II.IID_row48 ; |
19347 | 96 | if (IID == 0) |
19348 | 0 | return nullptr; |
19349 | 96 | Function *Intrinsic = |
19350 | 96 | CGM.getIntrinsic(IID, Dst->getType()); |
19351 | 96 | llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); |
19352 | 96 | SmallVector<Value *, 10> Values = {Dst}; |
19353 | 656 | for (unsigned i = 0; i < II.NumResults; ++i560 ) { |
19354 | 560 | Value *V = Builder.CreateAlignedLoad( |
19355 | 560 | Src.getElementType(), |
19356 | 560 | Builder.CreateGEP(Src.getElementType(), Src.getPointer(), |
19357 | 560 | llvm::ConstantInt::get(IntTy, i)), |
19358 | 560 | CharUnits::fromQuantity(4)); |
19359 | 560 | Values.push_back(Builder.CreateBitCast(V, ParamType)); |
19360 | 560 | } |
19361 | 96 | Values.push_back(Ldm); |
19362 | 96 | Value *Result = Builder.CreateCall(Intrinsic, Values); |
19363 | 96 | return Result; |
19364 | 96 | } |
19365 | | |
19366 | | // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) --> |
19367 | | // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf> |
19368 | 56 | case NVPTX::BI__hmma_m16n16k16_mma_f16f16: |
19369 | 112 | case NVPTX::BI__hmma_m16n16k16_mma_f32f16: |
19370 | 168 | case NVPTX::BI__hmma_m16n16k16_mma_f32f32: |
19371 | 224 | case NVPTX::BI__hmma_m16n16k16_mma_f16f32: |
19372 | 264 | case NVPTX::BI__hmma_m32n8k16_mma_f16f16: |
19373 | 304 | case NVPTX::BI__hmma_m32n8k16_mma_f32f16: |
19374 | 344 | case NVPTX::BI__hmma_m32n8k16_mma_f32f32: |
19375 | 384 | case NVPTX::BI__hmma_m32n8k16_mma_f16f32: |
19376 | 424 | case NVPTX::BI__hmma_m8n32k16_mma_f16f16: |
19377 | 464 | case NVPTX::BI__hmma_m8n32k16_mma_f32f16: |
19378 | 504 | case NVPTX::BI__hmma_m8n32k16_mma_f32f32: |
19379 | 544 | case NVPTX::BI__hmma_m8n32k16_mma_f16f32: |
19380 | 560 | case NVPTX::BI__imma_m16n16k16_mma_s8: |
19381 | 576 | case NVPTX::BI__imma_m16n16k16_mma_u8: |
19382 | 592 | case NVPTX::BI__imma_m32n8k16_mma_s8: |
19383 | 608 | case NVPTX::BI__imma_m32n8k16_mma_u8: |
19384 | 624 | case NVPTX::BI__imma_m8n32k16_mma_s8: |
19385 | 640 | case NVPTX::BI__imma_m8n32k16_mma_u8: |
19386 | 644 | case NVPTX::BI__imma_m8n8k32_mma_s4: |
19387 | 648 | case NVPTX::BI__imma_m8n8k32_mma_u4: |
19388 | 650 | case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: |
19389 | 652 | case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1: |
19390 | 660 | case NVPTX::BI__dmma_m8n8k4_mma_f64: |
19391 | 668 | case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: |
19392 | 676 | case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: |
19393 | 684 | case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: |
19394 | 692 | case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: { |
19395 | 692 | Address Dst = EmitPointerWithAlignment(E->getArg(0)); |
19396 | 692 | Address SrcA = EmitPointerWithAlignment(E->getArg(1)); |
19397 | 692 | Address SrcB = EmitPointerWithAlignment(E->getArg(2)); |
19398 | 692 | Address SrcC = EmitPointerWithAlignment(E->getArg(3)); |
19399 | 692 | std::optional<llvm::APSInt> LayoutArg = |
19400 | 692 | E->getArg(4)->getIntegerConstantExpr(getContext()); |
19401 | 692 | if (!LayoutArg) |
19402 | 0 | return nullptr; |
19403 | 692 | int Layout = LayoutArg->getSExtValue(); |
19404 | 692 | if (Layout < 0 || Layout > 3) |
19405 | 0 | return nullptr; |
19406 | 692 | llvm::APSInt SatfArg; |
19407 | 692 | if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 || |
19408 | 692 | BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1690 ) |
19409 | 4 | SatfArg = 0; // .b1 does not have satf argument. |
19410 | 688 | else if (std::optional<llvm::APSInt> OptSatfArg = |
19411 | 688 | E->getArg(5)->getIntegerConstantExpr(getContext())) |
19412 | 688 | SatfArg = *OptSatfArg; |
19413 | 0 | else |
19414 | 0 | return nullptr; |
19415 | 692 | bool Satf = SatfArg.getSExtValue(); |
19416 | 692 | NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); |
19417 | 692 | unsigned IID = MI.getMMAIntrinsic(Layout, Satf); |
19418 | 692 | if (IID == 0) // Unsupported combination of Layout/Satf. |
19419 | 0 | return nullptr; |
19420 | | |
19421 | 692 | SmallVector<Value *, 24> Values; |
19422 | 692 | Function *Intrinsic = CGM.getIntrinsic(IID); |
19423 | 692 | llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); |
19424 | | // Load A |
19425 | 5.43k | for (unsigned i = 0; i < MI.NumEltsA; ++i4.74k ) { |
19426 | 4.74k | Value *V = Builder.CreateAlignedLoad( |
19427 | 4.74k | SrcA.getElementType(), |
19428 | 4.74k | Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(), |
19429 | 4.74k | llvm::ConstantInt::get(IntTy, i)), |
19430 | 4.74k | CharUnits::fromQuantity(4)); |
19431 | 4.74k | Values.push_back(Builder.CreateBitCast(V, AType)); |
19432 | 4.74k | } |
19433 | | // Load B |
19434 | 692 | llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); |
19435 | 5.43k | for (unsigned i = 0; i < MI.NumEltsB; ++i4.74k ) { |
19436 | 4.74k | Value *V = Builder.CreateAlignedLoad( |
19437 | 4.74k | SrcB.getElementType(), |
19438 | 4.74k | Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(), |
19439 | 4.74k | llvm::ConstantInt::get(IntTy, i)), |
19440 | 4.74k | CharUnits::fromQuantity(4)); |
19441 | 4.74k | Values.push_back(Builder.CreateBitCast(V, BType)); |
19442 | 4.74k | } |
19443 | | // Load C |
19444 | 692 | llvm::Type *CType = |
19445 | 692 | Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); |
19446 | 5.02k | for (unsigned i = 0; i < MI.NumEltsC; ++i4.32k ) { |
19447 | 4.32k | Value *V = Builder.CreateAlignedLoad( |
19448 | 4.32k | SrcC.getElementType(), |
19449 | 4.32k | Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(), |
19450 | 4.32k | llvm::ConstantInt::get(IntTy, i)), |
19451 | 4.32k | CharUnits::fromQuantity(4)); |
19452 | 4.32k | Values.push_back(Builder.CreateBitCast(V, CType)); |
19453 | 4.32k | } |
19454 | 692 | Value *Result = Builder.CreateCall(Intrinsic, Values); |
19455 | 692 | llvm::Type *DType = Dst.getElementType(); |
19456 | 5.02k | for (unsigned i = 0; i < MI.NumEltsD; ++i4.32k ) |
19457 | 4.32k | Builder.CreateAlignedStore( |
19458 | 4.32k | Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), |
19459 | 4.32k | Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(), |
19460 | 4.32k | llvm::ConstantInt::get(IntTy, i)), |
19461 | 4.32k | CharUnits::fromQuantity(4)); |
19462 | 692 | return Result; |
19463 | 692 | } |
19464 | | // The following builtins require half type support |
19465 | 0 | case NVPTX::BI__nvvm_ex2_approx_f16: |
19466 | 0 | return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this); |
19467 | 0 | case NVPTX::BI__nvvm_ex2_approx_f16x2: |
19468 | 0 | return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this); |
19469 | 0 | case NVPTX::BI__nvvm_ff2f16x2_rn: |
19470 | 0 | return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this); |
19471 | 0 | case NVPTX::BI__nvvm_ff2f16x2_rn_relu: |
19472 | 0 | return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this); |
19473 | 0 | case NVPTX::BI__nvvm_ff2f16x2_rz: |
19474 | 0 | return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this); |
19475 | 0 | case NVPTX::BI__nvvm_ff2f16x2_rz_relu: |
19476 | 0 | return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this); |
19477 | 0 | case NVPTX::BI__nvvm_fma_rn_f16: |
19478 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this); |
19479 | 0 | case NVPTX::BI__nvvm_fma_rn_f16x2: |
19480 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this); |
19481 | 0 | case NVPTX::BI__nvvm_fma_rn_ftz_f16: |
19482 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this); |
19483 | 0 | case NVPTX::BI__nvvm_fma_rn_ftz_f16x2: |
19484 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this); |
19485 | 0 | case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16: |
19486 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E, |
19487 | 0 | *this); |
19488 | 0 | case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2: |
19489 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E, |
19490 | 0 | *this); |
19491 | 0 | case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16: |
19492 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E, |
19493 | 0 | *this); |
19494 | 0 | case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2: |
19495 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E, |
19496 | 0 | *this); |
19497 | 0 | case NVPTX::BI__nvvm_fma_rn_relu_f16: |
19498 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this); |
19499 | 0 | case NVPTX::BI__nvvm_fma_rn_relu_f16x2: |
19500 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this); |
19501 | 0 | case NVPTX::BI__nvvm_fma_rn_sat_f16: |
19502 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this); |
19503 | 0 | case NVPTX::BI__nvvm_fma_rn_sat_f16x2: |
19504 | 0 | return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this); |
19505 | 0 | case NVPTX::BI__nvvm_fmax_f16: |
19506 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this); |
19507 | 0 | case NVPTX::BI__nvvm_fmax_f16x2: |
19508 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this); |
19509 | 0 | case NVPTX::BI__nvvm_fmax_ftz_f16: |
19510 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this); |
19511 | 0 | case NVPTX::BI__nvvm_fmax_ftz_f16x2: |
19512 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this); |
19513 | 0 | case NVPTX::BI__nvvm_fmax_ftz_nan_f16: |
19514 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this); |
19515 | 0 | case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2: |
19516 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E, |
19517 | 0 | *this); |
19518 | 0 | case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16: |
19519 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID, |
19520 | 0 | E, *this); |
19521 | 0 | case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2: |
19522 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2, |
19523 | 0 | BuiltinID, E, *this); |
19524 | 0 | case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16: |
19525 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E, |
19526 | 0 | *this); |
19527 | 0 | case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2: |
19528 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID, |
19529 | 0 | E, *this); |
19530 | 0 | case NVPTX::BI__nvvm_fmax_nan_f16: |
19531 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this); |
19532 | 0 | case NVPTX::BI__nvvm_fmax_nan_f16x2: |
19533 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this); |
19534 | 0 | case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16: |
19535 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E, |
19536 | 0 | *this); |
19537 | 0 | case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2: |
19538 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID, |
19539 | 0 | E, *this); |
19540 | 0 | case NVPTX::BI__nvvm_fmax_xorsign_abs_f16: |
19541 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E, |
19542 | 0 | *this); |
19543 | 0 | case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2: |
19544 | 0 | return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E, |
19545 | 0 | *this); |
19546 | 0 | case NVPTX::BI__nvvm_fmin_f16: |
19547 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this); |
19548 | 0 | case NVPTX::BI__nvvm_fmin_f16x2: |
19549 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this); |
19550 | 0 | case NVPTX::BI__nvvm_fmin_ftz_f16: |
19551 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this); |
19552 | 0 | case NVPTX::BI__nvvm_fmin_ftz_f16x2: |
19553 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this); |
19554 | 0 | case NVPTX::BI__nvvm_fmin_ftz_nan_f16: |
19555 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this); |
19556 | 0 | case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2: |
19557 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E, |
19558 | 0 | *this); |
19559 | 0 | case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16: |
19560 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID, |
19561 | 0 | E, *this); |
19562 | 0 | case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2: |
19563 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2, |
19564 | 0 | BuiltinID, E, *this); |
19565 | 0 | case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16: |
19566 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E, |
19567 | 0 | *this); |
19568 | 0 | case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2: |
19569 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID, |
19570 | 0 | E, *this); |
19571 | 0 | case NVPTX::BI__nvvm_fmin_nan_f16: |
19572 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this); |
19573 | 0 | case NVPTX::BI__nvvm_fmin_nan_f16x2: |
19574 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this); |
19575 | 0 | case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16: |
19576 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E, |
19577 | 0 | *this); |
19578 | 0 | case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2: |
19579 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID, |
19580 | 0 | E, *this); |
19581 | 0 | case NVPTX::BI__nvvm_fmin_xorsign_abs_f16: |
19582 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E, |
19583 | 0 | *this); |
19584 | 0 | case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2: |
19585 | 0 | return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E, |
19586 | 0 | *this); |
19587 | 0 | case NVPTX::BI__nvvm_ldg_h: |
19588 | 0 | return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); |
19589 | 0 | case NVPTX::BI__nvvm_ldg_h2: |
19590 | 0 | return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); |
19591 | 0 | case NVPTX::BI__nvvm_ldu_h: |
19592 | 0 | return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); |
19593 | 0 | case NVPTX::BI__nvvm_ldu_h2: { |
19594 | 0 | return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); |
19595 | 692 | } |
19596 | 0 | case NVPTX::BI__nvvm_cp_async_ca_shared_global_4: |
19597 | 0 | return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4, |
19598 | 0 | Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E, |
19599 | 0 | 4); |
19600 | 0 | case NVPTX::BI__nvvm_cp_async_ca_shared_global_8: |
19601 | 0 | return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8, |
19602 | 0 | Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E, |
19603 | 0 | 8); |
19604 | 0 | case NVPTX::BI__nvvm_cp_async_ca_shared_global_16: |
19605 | 0 | return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16, |
19606 | 0 | Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E, |
19607 | 0 | 16); |
19608 | 0 | case NVPTX::BI__nvvm_cp_async_cg_shared_global_16: |
19609 | 0 | return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16, |
19610 | 0 | Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E, |
19611 | 0 | 16); |
19612 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x: |
19613 | 1 | return Builder.CreateCall( |
19614 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x)); |
19615 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y: |
19616 | 1 | return Builder.CreateCall( |
19617 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y)); |
19618 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z: |
19619 | 1 | return Builder.CreateCall( |
19620 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z)); |
19621 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w: |
19622 | 1 | return Builder.CreateCall( |
19623 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w)); |
19624 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x: |
19625 | 1 | return Builder.CreateCall( |
19626 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x)); |
19627 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y: |
19628 | 1 | return Builder.CreateCall( |
19629 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y)); |
19630 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z: |
19631 | 1 | return Builder.CreateCall( |
19632 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z)); |
19633 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w: |
19634 | 1 | return Builder.CreateCall( |
19635 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w)); |
19636 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x: |
19637 | 1 | return Builder.CreateCall( |
19638 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x)); |
19639 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y: |
19640 | 1 | return Builder.CreateCall( |
19641 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y)); |
19642 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z: |
19643 | 1 | return Builder.CreateCall( |
19644 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z)); |
19645 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w: |
19646 | 1 | return Builder.CreateCall( |
19647 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w)); |
19648 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x: |
19649 | 1 | return Builder.CreateCall( |
19650 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x)); |
19651 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y: |
19652 | 1 | return Builder.CreateCall( |
19653 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y)); |
19654 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z: |
19655 | 1 | return Builder.CreateCall( |
19656 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z)); |
19657 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w: |
19658 | 1 | return Builder.CreateCall( |
19659 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w)); |
19660 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank: |
19661 | 1 | return Builder.CreateCall( |
19662 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank)); |
19663 | 1 | case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank: |
19664 | 1 | return Builder.CreateCall( |
19665 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank)); |
19666 | 1 | case NVPTX::BI__nvvm_is_explicit_cluster: |
19667 | 1 | return Builder.CreateCall( |
19668 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster)); |
19669 | 1 | case NVPTX::BI__nvvm_isspacep_shared_cluster: |
19670 | 1 | return Builder.CreateCall( |
19671 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster), |
19672 | 1 | EmitScalarExpr(E->getArg(0))); |
19673 | 1 | case NVPTX::BI__nvvm_mapa: |
19674 | 1 | return Builder.CreateCall( |
19675 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_mapa), |
19676 | 1 | {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); |
19677 | 1 | case NVPTX::BI__nvvm_mapa_shared_cluster: |
19678 | 1 | return Builder.CreateCall( |
19679 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster), |
19680 | 1 | {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); |
19681 | 1 | case NVPTX::BI__nvvm_getctarank: |
19682 | 1 | return Builder.CreateCall( |
19683 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_getctarank), |
19684 | 1 | EmitScalarExpr(E->getArg(0))); |
19685 | 1 | case NVPTX::BI__nvvm_getctarank_shared_cluster: |
19686 | 1 | return Builder.CreateCall( |
19687 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster), |
19688 | 1 | EmitScalarExpr(E->getArg(0))); |
19689 | 1 | case NVPTX::BI__nvvm_barrier_cluster_arrive: |
19690 | 1 | return Builder.CreateCall( |
19691 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive)); |
19692 | 1 | case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed: |
19693 | 1 | return Builder.CreateCall( |
19694 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed)); |
19695 | 1 | case NVPTX::BI__nvvm_barrier_cluster_wait: |
19696 | 1 | return Builder.CreateCall( |
19697 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait)); |
19698 | 1 | case NVPTX::BI__nvvm_fence_sc_cluster: |
19699 | 1 | return Builder.CreateCall( |
19700 | 1 | CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster)); |
19701 | 0 | default: |
19702 | 0 | return nullptr; |
19703 | 1.09k | } |
19704 | 1.09k | } |
19705 | | |
19706 | | namespace { |
19707 | | struct BuiltinAlignArgs { |
19708 | | llvm::Value *Src = nullptr; |
19709 | | llvm::Type *SrcType = nullptr; |
19710 | | llvm::Value *Alignment = nullptr; |
19711 | | llvm::Value *Mask = nullptr; |
19712 | | llvm::IntegerType *IntType = nullptr; |
19713 | | |
19714 | 18 | BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) { |
19715 | 18 | QualType AstType = E->getArg(0)->getType(); |
19716 | 18 | if (AstType->isArrayType()) |
19717 | 0 | Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer(); |
19718 | 18 | else |
19719 | 18 | Src = CGF.EmitScalarExpr(E->getArg(0)); |
19720 | 18 | SrcType = Src->getType(); |
19721 | 18 | if (SrcType->isPointerTy()) { |
19722 | 12 | IntType = IntegerType::get( |
19723 | 12 | CGF.getLLVMContext(), |
19724 | 12 | CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); |
19725 | 12 | } else { |
19726 | 6 | assert(SrcType->isIntegerTy()); |
19727 | 6 | IntType = cast<llvm::IntegerType>(SrcType); |
19728 | 6 | } |
19729 | 18 | Alignment = CGF.EmitScalarExpr(E->getArg(1)); |
19730 | 18 | Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment"); |
19731 | 18 | auto *One = llvm::ConstantInt::get(IntType, 1); |
19732 | 18 | Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); |
19733 | 18 | } |
19734 | | }; |
19735 | | } // namespace |
19736 | | |
19737 | | /// Generate (x & (y-1)) == 0. |
19738 | 5 | RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { |
19739 | 5 | BuiltinAlignArgs Args(E, *this); |
19740 | 5 | llvm::Value *SrcAddress = Args.Src; |
19741 | 5 | if (Args.SrcType->isPointerTy()) |
19742 | 3 | SrcAddress = |
19743 | 3 | Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr"); |
19744 | 5 | return RValue::get(Builder.CreateICmpEQ( |
19745 | 5 | Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"), |
19746 | 5 | llvm::Constant::getNullValue(Args.IntType), "is_aligned")); |
19747 | 5 | } |
19748 | | |
19749 | | /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. |
19750 | | /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the |
19751 | | /// llvm.ptrmask intrinsic (with a GEP before in the align_up case). |
19752 | 13 | RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { |
19753 | 13 | BuiltinAlignArgs Args(E, *this); |
19754 | 13 | llvm::Value *SrcForMask = Args.Src; |
19755 | 13 | if (AlignUp) { |
19756 | | // When aligning up we have to first add the mask to ensure we go over the |
19757 | | // next alignment value and then align down to the next valid multiple. |
19758 | | // By adding the mask, we ensure that align_up on an already aligned |
19759 | | // value will not change the value. |
19760 | 7 | if (Args.Src->getType()->isPointerTy()) { |
19761 | 5 | if (getLangOpts().isSignedOverflowDefined()) |
19762 | 0 | SrcForMask = |
19763 | 0 | Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary"); |
19764 | 5 | else |
19765 | 5 | SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask, |
19766 | 5 | /*SignedIndices=*/true, |
19767 | 5 | /*isSubtraction=*/false, |
19768 | 5 | E->getExprLoc(), "over_boundary"); |
19769 | 5 | } else { |
19770 | 2 | SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); |
19771 | 2 | } |
19772 | 7 | } |
19773 | | // Invert the mask to only clear the lower bits. |
19774 | 13 | llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); |
19775 | 13 | llvm::Value *Result = nullptr; |
19776 | 13 | if (Args.Src->getType()->isPointerTy()) { |
19777 | 9 | Result = Builder.CreateIntrinsic( |
19778 | 9 | Intrinsic::ptrmask, {Args.SrcType, Args.IntType}, |
19779 | 9 | {SrcForMask, InvertedMask}, nullptr, "aligned_result"); |
19780 | 9 | } else { |
19781 | 4 | Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); |
19782 | 4 | } |
19783 | 13 | assert(Result->getType() == Args.SrcType); |
19784 | 13 | return RValue::get(Result); |
19785 | 13 | } |
19786 | | |
19787 | | Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, |
19788 | 283 | const CallExpr *E) { |
19789 | 283 | switch (BuiltinID) { |
19790 | 3 | case WebAssembly::BI__builtin_wasm_memory_size: { |
19791 | 3 | llvm::Type *ResultType = ConvertType(E->getType()); |
19792 | 3 | Value *I = EmitScalarExpr(E->getArg(0)); |
19793 | 3 | Function *Callee = |
19794 | 3 | CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); |
19795 | 3 | return Builder.CreateCall(Callee, I); |
19796 | 0 | } |
19797 | 3 | case WebAssembly::BI__builtin_wasm_memory_grow: { |
19798 | 3 | llvm::Type *ResultType = ConvertType(E->getType()); |
19799 | 3 | Value *Args[] = {EmitScalarExpr(E->getArg(0)), |
19800 | 3 | EmitScalarExpr(E->getArg(1))}; |
19801 | 3 | Function *Callee = |
19802 | 3 | CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); |
19803 | 3 | return Builder.CreateCall(Callee, Args); |
19804 | 0 | } |
19805 | 3 | case WebAssembly::BI__builtin_wasm_tls_size: { |
19806 | 3 | llvm::Type *ResultType = ConvertType(E->getType()); |
19807 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); |
19808 | 3 | return Builder.CreateCall(Callee); |
19809 | 0 | } |
19810 | 3 | case WebAssembly::BI__builtin_wasm_tls_align: { |
19811 | 3 | llvm::Type *ResultType = ConvertType(E->getType()); |
19812 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType); |
19813 | 3 | return Builder.CreateCall(Callee); |
19814 | 0 | } |
19815 | 3 | case WebAssembly::BI__builtin_wasm_tls_base: { |
19816 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base); |
19817 | 3 | return Builder.CreateCall(Callee); |
19818 | 0 | } |
19819 | 3 | case WebAssembly::BI__builtin_wasm_throw: { |
19820 | 3 | Value *Tag = EmitScalarExpr(E->getArg(0)); |
19821 | 3 | Value *Obj = EmitScalarExpr(E->getArg(1)); |
19822 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); |
19823 | 3 | return Builder.CreateCall(Callee, {Tag, Obj}); |
19824 | 0 | } |
19825 | 3 | case WebAssembly::BI__builtin_wasm_rethrow: { |
19826 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); |
19827 | 3 | return Builder.CreateCall(Callee); |
19828 | 0 | } |
19829 | 3 | case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: { |
19830 | 3 | Value *Addr = EmitScalarExpr(E->getArg(0)); |
19831 | 3 | Value *Expected = EmitScalarExpr(E->getArg(1)); |
19832 | 3 | Value *Timeout = EmitScalarExpr(E->getArg(2)); |
19833 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32); |
19834 | 3 | return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); |
19835 | 0 | } |
19836 | 3 | case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: { |
19837 | 3 | Value *Addr = EmitScalarExpr(E->getArg(0)); |
19838 | 3 | Value *Expected = EmitScalarExpr(E->getArg(1)); |
19839 | 3 | Value *Timeout = EmitScalarExpr(E->getArg(2)); |
19840 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64); |
19841 | 3 | return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); |
19842 | 0 | } |
19843 | 3 | case WebAssembly::BI__builtin_wasm_memory_atomic_notify: { |
19844 | 3 | Value *Addr = EmitScalarExpr(E->getArg(0)); |
19845 | 3 | Value *Count = EmitScalarExpr(E->getArg(1)); |
19846 | 3 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify); |
19847 | 3 | return Builder.CreateCall(Callee, {Addr, Count}); |
19848 | 0 | } |
19849 | 3 | case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: |
19850 | 6 | case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64: |
19851 | 9 | case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32: |
19852 | 12 | case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: { |
19853 | 12 | Value *Src = EmitScalarExpr(E->getArg(0)); |
19854 | 12 | llvm::Type *ResT = ConvertType(E->getType()); |
19855 | 12 | Function *Callee = |
19856 | 12 | CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()}); |
19857 | 12 | return Builder.CreateCall(Callee, {Src}); |
19858 | 9 | } |
19859 | 3 | case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32: |
19860 | 6 | case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64: |
19861 | 9 | case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32: |
19862 | 12 | case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: { |
19863 | 12 | Value *Src = EmitScalarExpr(E->getArg(0)); |
19864 | 12 | llvm::Type *ResT = ConvertType(E->getType()); |
19865 | 12 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned, |
19866 | 12 | {ResT, Src->getType()}); |
19867 | 12 | return Builder.CreateCall(Callee, {Src}); |
19868 | 9 | } |
19869 | 3 | case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: |
19870 | 6 | case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: |
19871 | 9 | case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: |
19872 | 12 | case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: |
19873 | 14 | case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { |
19874 | 14 | Value *Src = EmitScalarExpr(E->getArg(0)); |
19875 | 14 | llvm::Type *ResT = ConvertType(E->getType()); |
19876 | 14 | Function *Callee = |
19877 | 14 | CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()}); |
19878 | 14 | return Builder.CreateCall(Callee, {Src}); |
19879 | 12 | } |
19880 | 3 | case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: |
19881 | 6 | case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: |
19882 | 9 | case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: |
19883 | 12 | case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: |
19884 | 14 | case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { |
19885 | 14 | Value *Src = EmitScalarExpr(E->getArg(0)); |
19886 | 14 | llvm::Type *ResT = ConvertType(E->getType()); |
19887 | 14 | Function *Callee = |
19888 | 14 | CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()}); |
19889 | 14 | return Builder.CreateCall(Callee, {Src}); |
19890 | 12 | } |
19891 | 3 | case WebAssembly::BI__builtin_wasm_min_f32: |
19892 | 6 | case WebAssembly::BI__builtin_wasm_min_f64: |
19893 | 8 | case WebAssembly::BI__builtin_wasm_min_f32x4: |
19894 | 10 | case WebAssembly::BI__builtin_wasm_min_f64x2: { |
19895 | 10 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
19896 | 10 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
19897 | 10 | Function *Callee = |
19898 | 10 | CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); |
19899 | 10 | return Builder.CreateCall(Callee, {LHS, RHS}); |
19900 | 8 | } |
19901 | 3 | case WebAssembly::BI__builtin_wasm_max_f32: |
19902 | 6 | case WebAssembly::BI__builtin_wasm_max_f64: |
19903 | 8 | case WebAssembly::BI__builtin_wasm_max_f32x4: |
19904 | 10 | case WebAssembly::BI__builtin_wasm_max_f64x2: { |
19905 | 10 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
19906 | 10 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
19907 | 10 | Function *Callee = |
19908 | 10 | CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); |
19909 | 10 | return Builder.CreateCall(Callee, {LHS, RHS}); |
19910 | 8 | } |
19911 | 2 | case WebAssembly::BI__builtin_wasm_pmin_f32x4: |
19912 | 4 | case WebAssembly::BI__builtin_wasm_pmin_f64x2: { |
19913 | 4 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
19914 | 4 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
19915 | 4 | Function *Callee = |
19916 | 4 | CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); |
19917 | 4 | return Builder.CreateCall(Callee, {LHS, RHS}); |
19918 | 2 | } |
19919 | 2 | case WebAssembly::BI__builtin_wasm_pmax_f32x4: |
19920 | 4 | case WebAssembly::BI__builtin_wasm_pmax_f64x2: { |
19921 | 4 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
19922 | 4 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
19923 | 4 | Function *Callee = |
19924 | 4 | CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); |
19925 | 4 | return Builder.CreateCall(Callee, {LHS, RHS}); |
19926 | 2 | } |
19927 | 2 | case WebAssembly::BI__builtin_wasm_ceil_f32x4: |
19928 | 4 | case WebAssembly::BI__builtin_wasm_floor_f32x4: |
19929 | 6 | case WebAssembly::BI__builtin_wasm_trunc_f32x4: |
19930 | 8 | case WebAssembly::BI__builtin_wasm_nearest_f32x4: |
19931 | 10 | case WebAssembly::BI__builtin_wasm_ceil_f64x2: |
19932 | 12 | case WebAssembly::BI__builtin_wasm_floor_f64x2: |
19933 | 14 | case WebAssembly::BI__builtin_wasm_trunc_f64x2: |
19934 | 16 | case WebAssembly::BI__builtin_wasm_nearest_f64x2: { |
19935 | 16 | unsigned IntNo; |
19936 | 16 | switch (BuiltinID) { |
19937 | 2 | case WebAssembly::BI__builtin_wasm_ceil_f32x4: |
19938 | 4 | case WebAssembly::BI__builtin_wasm_ceil_f64x2: |
19939 | 4 | IntNo = Intrinsic::ceil; |
19940 | 4 | break; |
19941 | 2 | case WebAssembly::BI__builtin_wasm_floor_f32x4: |
19942 | 4 | case WebAssembly::BI__builtin_wasm_floor_f64x2: |
19943 | 4 | IntNo = Intrinsic::floor; |
19944 | 4 | break; |
19945 | 2 | case WebAssembly::BI__builtin_wasm_trunc_f32x4: |
19946 | 4 | case WebAssembly::BI__builtin_wasm_trunc_f64x2: |
19947 | 4 | IntNo = Intrinsic::trunc; |
19948 | 4 | break; |
19949 | 2 | case WebAssembly::BI__builtin_wasm_nearest_f32x4: |
19950 | 4 | case WebAssembly::BI__builtin_wasm_nearest_f64x2: |
19951 | 4 | IntNo = Intrinsic::nearbyint; |
19952 | 4 | break; |
19953 | 0 | default: |
19954 | 0 | llvm_unreachable("unexpected builtin ID"); |
19955 | 16 | } |
19956 | 16 | Value *Value = EmitScalarExpr(E->getArg(0)); |
19957 | 16 | Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); |
19958 | 16 | return Builder.CreateCall(Callee, Value); |
19959 | 16 | } |
19960 | 2 | case WebAssembly::BI__builtin_wasm_ref_null_extern: { |
19961 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern); |
19962 | 2 | return Builder.CreateCall(Callee); |
19963 | 16 | } |
19964 | 2 | case WebAssembly::BI__builtin_wasm_ref_null_func: { |
19965 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func); |
19966 | 2 | return Builder.CreateCall(Callee); |
19967 | 16 | } |
19968 | 2 | case WebAssembly::BI__builtin_wasm_swizzle_i8x16: { |
19969 | 2 | Value *Src = EmitScalarExpr(E->getArg(0)); |
19970 | 2 | Value *Indices = EmitScalarExpr(E->getArg(1)); |
19971 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); |
19972 | 2 | return Builder.CreateCall(Callee, {Src, Indices}); |
19973 | 16 | } |
19974 | 3 | case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16: |
19975 | 5 | case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16: |
19976 | 7 | case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8: |
19977 | 9 | case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8: |
19978 | 11 | case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: |
19979 | 13 | case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: |
19980 | 15 | case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: |
19981 | 17 | case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: { |
19982 | 17 | unsigned IntNo; |
19983 | 17 | switch (BuiltinID) { |
19984 | 3 | case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16: |
19985 | 5 | case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8: |
19986 | 5 | IntNo = Intrinsic::sadd_sat; |
19987 | 5 | break; |
19988 | 2 | case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16: |
19989 | 4 | case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8: |
19990 | 4 | IntNo = Intrinsic::uadd_sat; |
19991 | 4 | break; |
19992 | 2 | case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: |
19993 | 4 | case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: |
19994 | 4 | IntNo = Intrinsic::wasm_sub_sat_signed; |
19995 | 4 | break; |
19996 | 2 | case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: |
19997 | 4 | case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: |
19998 | 4 | IntNo = Intrinsic::wasm_sub_sat_unsigned; |
19999 | 4 | break; |
20000 | 0 | default: |
20001 | 0 | llvm_unreachable("unexpected builtin ID"); |
20002 | 17 | } |
20003 | 17 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20004 | 17 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20005 | 17 | Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); |
20006 | 17 | return Builder.CreateCall(Callee, {LHS, RHS}); |
20007 | 17 | } |
20008 | 2 | case WebAssembly::BI__builtin_wasm_abs_i8x16: |
20009 | 4 | case WebAssembly::BI__builtin_wasm_abs_i16x8: |
20010 | 6 | case WebAssembly::BI__builtin_wasm_abs_i32x4: |
20011 | 8 | case WebAssembly::BI__builtin_wasm_abs_i64x2: { |
20012 | 8 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20013 | 8 | Value *Neg = Builder.CreateNeg(Vec, "neg"); |
20014 | 8 | Constant *Zero = llvm::Constant::getNullValue(Vec->getType()); |
20015 | 8 | Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond"); |
20016 | 8 | return Builder.CreateSelect(ICmp, Neg, Vec, "abs"); |
20017 | 6 | } |
20018 | 2 | case WebAssembly::BI__builtin_wasm_min_s_i8x16: |
20019 | 4 | case WebAssembly::BI__builtin_wasm_min_u_i8x16: |
20020 | 6 | case WebAssembly::BI__builtin_wasm_max_s_i8x16: |
20021 | 8 | case WebAssembly::BI__builtin_wasm_max_u_i8x16: |
20022 | 10 | case WebAssembly::BI__builtin_wasm_min_s_i16x8: |
20023 | 12 | case WebAssembly::BI__builtin_wasm_min_u_i16x8: |
20024 | 14 | case WebAssembly::BI__builtin_wasm_max_s_i16x8: |
20025 | 16 | case WebAssembly::BI__builtin_wasm_max_u_i16x8: |
20026 | 18 | case WebAssembly::BI__builtin_wasm_min_s_i32x4: |
20027 | 20 | case WebAssembly::BI__builtin_wasm_min_u_i32x4: |
20028 | 22 | case WebAssembly::BI__builtin_wasm_max_s_i32x4: |
20029 | 24 | case WebAssembly::BI__builtin_wasm_max_u_i32x4: { |
20030 | 24 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20031 | 24 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20032 | 24 | Value *ICmp; |
20033 | 24 | switch (BuiltinID) { |
20034 | 2 | case WebAssembly::BI__builtin_wasm_min_s_i8x16: |
20035 | 4 | case WebAssembly::BI__builtin_wasm_min_s_i16x8: |
20036 | 6 | case WebAssembly::BI__builtin_wasm_min_s_i32x4: |
20037 | 6 | ICmp = Builder.CreateICmpSLT(LHS, RHS); |
20038 | 6 | break; |
20039 | 2 | case WebAssembly::BI__builtin_wasm_min_u_i8x16: |
20040 | 4 | case WebAssembly::BI__builtin_wasm_min_u_i16x8: |
20041 | 6 | case WebAssembly::BI__builtin_wasm_min_u_i32x4: |
20042 | 6 | ICmp = Builder.CreateICmpULT(LHS, RHS); |
20043 | 6 | break; |
20044 | 2 | case WebAssembly::BI__builtin_wasm_max_s_i8x16: |
20045 | 4 | case WebAssembly::BI__builtin_wasm_max_s_i16x8: |
20046 | 6 | case WebAssembly::BI__builtin_wasm_max_s_i32x4: |
20047 | 6 | ICmp = Builder.CreateICmpSGT(LHS, RHS); |
20048 | 6 | break; |
20049 | 2 | case WebAssembly::BI__builtin_wasm_max_u_i8x16: |
20050 | 4 | case WebAssembly::BI__builtin_wasm_max_u_i16x8: |
20051 | 6 | case WebAssembly::BI__builtin_wasm_max_u_i32x4: |
20052 | 6 | ICmp = Builder.CreateICmpUGT(LHS, RHS); |
20053 | 6 | break; |
20054 | 0 | default: |
20055 | 0 | llvm_unreachable("unexpected builtin ID"); |
20056 | 24 | } |
20057 | 24 | return Builder.CreateSelect(ICmp, LHS, RHS); |
20058 | 24 | } |
20059 | 2 | case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: |
20060 | 4 | case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { |
20061 | 4 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20062 | 4 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20063 | 4 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned, |
20064 | 4 | ConvertType(E->getType())); |
20065 | 4 | return Builder.CreateCall(Callee, {LHS, RHS}); |
20066 | 2 | } |
20067 | 2 | case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: { |
20068 | 2 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20069 | 2 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20070 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed); |
20071 | 2 | return Builder.CreateCall(Callee, {LHS, RHS}); |
20072 | 2 | } |
20073 | 2 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: |
20074 | 4 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: |
20075 | 6 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: |
20076 | 8 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: { |
20077 | 8 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20078 | 8 | unsigned IntNo; |
20079 | 8 | switch (BuiltinID) { |
20080 | 2 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: |
20081 | 4 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: |
20082 | 4 | IntNo = Intrinsic::wasm_extadd_pairwise_signed; |
20083 | 4 | break; |
20084 | 2 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: |
20085 | 4 | case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: |
20086 | 4 | IntNo = Intrinsic::wasm_extadd_pairwise_unsigned; |
20087 | 4 | break; |
20088 | 0 | default: |
20089 | 0 | llvm_unreachable("unexpected builtin ID"); |
20090 | 8 | } |
20091 | | |
20092 | 8 | Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); |
20093 | 8 | return Builder.CreateCall(Callee, Vec); |
20094 | 8 | } |
20095 | 2 | case WebAssembly::BI__builtin_wasm_bitselect: { |
20096 | 2 | Value *V1 = EmitScalarExpr(E->getArg(0)); |
20097 | 2 | Value *V2 = EmitScalarExpr(E->getArg(1)); |
20098 | 2 | Value *C = EmitScalarExpr(E->getArg(2)); |
20099 | 2 | Function *Callee = |
20100 | 2 | CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); |
20101 | 2 | return Builder.CreateCall(Callee, {V1, V2, C}); |
20102 | 8 | } |
20103 | 2 | case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { |
20104 | 2 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20105 | 2 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20106 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot); |
20107 | 2 | return Builder.CreateCall(Callee, {LHS, RHS}); |
20108 | 8 | } |
20109 | 2 | case WebAssembly::BI__builtin_wasm_popcnt_i8x16: { |
20110 | 2 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20111 | 2 | Function *Callee = |
20112 | 2 | CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType())); |
20113 | 2 | return Builder.CreateCall(Callee, {Vec}); |
20114 | 8 | } |
20115 | 2 | case WebAssembly::BI__builtin_wasm_any_true_v128: |
20116 | 4 | case WebAssembly::BI__builtin_wasm_all_true_i8x16: |
20117 | 6 | case WebAssembly::BI__builtin_wasm_all_true_i16x8: |
20118 | 8 | case WebAssembly::BI__builtin_wasm_all_true_i32x4: |
20119 | 10 | case WebAssembly::BI__builtin_wasm_all_true_i64x2: { |
20120 | 10 | unsigned IntNo; |
20121 | 10 | switch (BuiltinID) { |
20122 | 2 | case WebAssembly::BI__builtin_wasm_any_true_v128: |
20123 | 2 | IntNo = Intrinsic::wasm_anytrue; |
20124 | 2 | break; |
20125 | 2 | case WebAssembly::BI__builtin_wasm_all_true_i8x16: |
20126 | 4 | case WebAssembly::BI__builtin_wasm_all_true_i16x8: |
20127 | 6 | case WebAssembly::BI__builtin_wasm_all_true_i32x4: |
20128 | 8 | case WebAssembly::BI__builtin_wasm_all_true_i64x2: |
20129 | 8 | IntNo = Intrinsic::wasm_alltrue; |
20130 | 8 | break; |
20131 | 0 | default: |
20132 | 0 | llvm_unreachable("unexpected builtin ID"); |
20133 | 10 | } |
20134 | 10 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20135 | 10 | Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); |
20136 | 10 | return Builder.CreateCall(Callee, {Vec}); |
20137 | 10 | } |
20138 | 2 | case WebAssembly::BI__builtin_wasm_bitmask_i8x16: |
20139 | 4 | case WebAssembly::BI__builtin_wasm_bitmask_i16x8: |
20140 | 6 | case WebAssembly::BI__builtin_wasm_bitmask_i32x4: |
20141 | 8 | case WebAssembly::BI__builtin_wasm_bitmask_i64x2: { |
20142 | 8 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20143 | 8 | Function *Callee = |
20144 | 8 | CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); |
20145 | 8 | return Builder.CreateCall(Callee, {Vec}); |
20146 | 6 | } |
20147 | 2 | case WebAssembly::BI__builtin_wasm_abs_f32x4: |
20148 | 4 | case WebAssembly::BI__builtin_wasm_abs_f64x2: { |
20149 | 4 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20150 | 4 | Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); |
20151 | 4 | return Builder.CreateCall(Callee, {Vec}); |
20152 | 2 | } |
20153 | 2 | case WebAssembly::BI__builtin_wasm_sqrt_f32x4: |
20154 | 4 | case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { |
20155 | 4 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20156 | 4 | Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); |
20157 | 4 | return Builder.CreateCall(Callee, {Vec}); |
20158 | 2 | } |
20159 | 2 | case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: |
20160 | 4 | case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: |
20161 | 6 | case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: |
20162 | 8 | case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: { |
20163 | 8 | Value *Low = EmitScalarExpr(E->getArg(0)); |
20164 | 8 | Value *High = EmitScalarExpr(E->getArg(1)); |
20165 | 8 | unsigned IntNo; |
20166 | 8 | switch (BuiltinID) { |
20167 | 2 | case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: |
20168 | 4 | case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: |
20169 | 4 | IntNo = Intrinsic::wasm_narrow_signed; |
20170 | 4 | break; |
20171 | 2 | case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: |
20172 | 4 | case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: |
20173 | 4 | IntNo = Intrinsic::wasm_narrow_unsigned; |
20174 | 4 | break; |
20175 | 0 | default: |
20176 | 0 | llvm_unreachable("unexpected builtin ID"); |
20177 | 8 | } |
20178 | 8 | Function *Callee = |
20179 | 8 | CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); |
20180 | 8 | return Builder.CreateCall(Callee, {Low, High}); |
20181 | 8 | } |
20182 | 2 | case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: |
20183 | 4 | case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: { |
20184 | 4 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20185 | 4 | unsigned IntNo; |
20186 | 4 | switch (BuiltinID) { |
20187 | 2 | case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: |
20188 | 2 | IntNo = Intrinsic::fptosi_sat; |
20189 | 2 | break; |
20190 | 2 | case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: |
20191 | 2 | IntNo = Intrinsic::fptoui_sat; |
20192 | 2 | break; |
20193 | 0 | default: |
20194 | 0 | llvm_unreachable("unexpected builtin ID"); |
20195 | 4 | } |
20196 | 4 | llvm::Type *SrcT = Vec->getType(); |
20197 | 4 | llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty()); |
20198 | 4 | Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT}); |
20199 | 4 | Value *Trunc = Builder.CreateCall(Callee, Vec); |
20200 | 4 | Value *Splat = Constant::getNullValue(TruncT); |
20201 | 4 | return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3}); |
20202 | 4 | } |
20203 | 2 | case WebAssembly::BI__builtin_wasm_shuffle_i8x16: { |
20204 | 2 | Value *Ops[18]; |
20205 | 2 | size_t OpIdx = 0; |
20206 | 2 | Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); |
20207 | 2 | Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); |
20208 | 34 | while (OpIdx < 18) { |
20209 | 32 | std::optional<llvm::APSInt> LaneConst = |
20210 | 32 | E->getArg(OpIdx)->getIntegerConstantExpr(getContext()); |
20211 | 32 | assert(LaneConst && "Constant arg isn't actually constant?"); |
20212 | 32 | Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst); |
20213 | 32 | } |
20214 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); |
20215 | 2 | return Builder.CreateCall(Callee, Ops); |
20216 | 2 | } |
20217 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: |
20218 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: |
20219 | 6 | case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: |
20220 | 8 | case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: { |
20221 | 8 | Value *A = EmitScalarExpr(E->getArg(0)); |
20222 | 8 | Value *B = EmitScalarExpr(E->getArg(1)); |
20223 | 8 | Value *C = EmitScalarExpr(E->getArg(2)); |
20224 | 8 | unsigned IntNo; |
20225 | 8 | switch (BuiltinID) { |
20226 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: |
20227 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: |
20228 | 4 | IntNo = Intrinsic::wasm_relaxed_madd; |
20229 | 4 | break; |
20230 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: |
20231 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: |
20232 | 4 | IntNo = Intrinsic::wasm_relaxed_nmadd; |
20233 | 4 | break; |
20234 | 0 | default: |
20235 | 0 | llvm_unreachable("unexpected builtin ID"); |
20236 | 8 | } |
20237 | 8 | Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); |
20238 | 8 | return Builder.CreateCall(Callee, {A, B, C}); |
20239 | 8 | } |
20240 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16: |
20241 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8: |
20242 | 6 | case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4: |
20243 | 8 | case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: { |
20244 | 8 | Value *A = EmitScalarExpr(E->getArg(0)); |
20245 | 8 | Value *B = EmitScalarExpr(E->getArg(1)); |
20246 | 8 | Value *C = EmitScalarExpr(E->getArg(2)); |
20247 | 8 | Function *Callee = |
20248 | 8 | CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType()); |
20249 | 8 | return Builder.CreateCall(Callee, {A, B, C}); |
20250 | 6 | } |
20251 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: { |
20252 | 2 | Value *Src = EmitScalarExpr(E->getArg(0)); |
20253 | 2 | Value *Indices = EmitScalarExpr(E->getArg(1)); |
20254 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle); |
20255 | 2 | return Builder.CreateCall(Callee, {Src, Indices}); |
20256 | 6 | } |
20257 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: |
20258 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: |
20259 | 6 | case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: |
20260 | 8 | case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: { |
20261 | 8 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20262 | 8 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20263 | 8 | unsigned IntNo; |
20264 | 8 | switch (BuiltinID) { |
20265 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: |
20266 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: |
20267 | 4 | IntNo = Intrinsic::wasm_relaxed_min; |
20268 | 4 | break; |
20269 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: |
20270 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: |
20271 | 4 | IntNo = Intrinsic::wasm_relaxed_max; |
20272 | 4 | break; |
20273 | 0 | default: |
20274 | 0 | llvm_unreachable("unexpected builtin ID"); |
20275 | 8 | } |
20276 | 8 | Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType()); |
20277 | 8 | return Builder.CreateCall(Callee, {LHS, RHS}); |
20278 | 8 | } |
20279 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: |
20280 | 4 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: |
20281 | 6 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: |
20282 | 8 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: { |
20283 | 8 | Value *Vec = EmitScalarExpr(E->getArg(0)); |
20284 | 8 | unsigned IntNo; |
20285 | 8 | switch (BuiltinID) { |
20286 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: |
20287 | 2 | IntNo = Intrinsic::wasm_relaxed_trunc_signed; |
20288 | 2 | break; |
20289 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: |
20290 | 2 | IntNo = Intrinsic::wasm_relaxed_trunc_unsigned; |
20291 | 2 | break; |
20292 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: |
20293 | 2 | IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero; |
20294 | 2 | break; |
20295 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: |
20296 | 2 | IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero; |
20297 | 2 | break; |
20298 | 0 | default: |
20299 | 0 | llvm_unreachable("unexpected builtin ID"); |
20300 | 8 | } |
20301 | 8 | Function *Callee = CGM.getIntrinsic(IntNo); |
20302 | 8 | return Builder.CreateCall(Callee, {Vec}); |
20303 | 8 | } |
20304 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: { |
20305 | 2 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20306 | 2 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20307 | 2 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed); |
20308 | 2 | return Builder.CreateCall(Callee, {LHS, RHS}); |
20309 | 8 | } |
20310 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: { |
20311 | 2 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20312 | 2 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20313 | 2 | Function *Callee = |
20314 | 2 | CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed); |
20315 | 2 | return Builder.CreateCall(Callee, {LHS, RHS}); |
20316 | 8 | } |
20317 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: { |
20318 | 2 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20319 | 2 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20320 | 2 | Value *Acc = EmitScalarExpr(E->getArg(2)); |
20321 | 2 | Function *Callee = |
20322 | 2 | CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed); |
20323 | 2 | return Builder.CreateCall(Callee, {LHS, RHS, Acc}); |
20324 | 8 | } |
20325 | 2 | case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: { |
20326 | 2 | Value *LHS = EmitScalarExpr(E->getArg(0)); |
20327 | 2 | Value *RHS = EmitScalarExpr(E->getArg(1)); |
20328 | 2 | Value *Acc = EmitScalarExpr(E->getArg(2)); |
20329 | 2 | Function *Callee = |
20330 | 2 | CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32); |
20331 | 2 | return Builder.CreateCall(Callee, {LHS, RHS, Acc}); |
20332 | 8 | } |
20333 | 0 | case WebAssembly::BI__builtin_wasm_table_get: { |
20334 | 0 | assert(E->getArg(0)->getType()->isArrayType()); |
20335 | 0 | Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); |
20336 | 0 | Value *Index = EmitScalarExpr(E->getArg(1)); |
20337 | 0 | Function *Callee; |
20338 | 0 | if (E->getType().isWebAssemblyExternrefType()) |
20339 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref); |
20340 | 0 | else if (E->getType().isWebAssemblyFuncrefType()) |
20341 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref); |
20342 | 0 | else |
20343 | 0 | llvm_unreachable( |
20344 | 0 | "Unexpected reference type for __builtin_wasm_table_get"); |
20345 | 0 | return Builder.CreateCall(Callee, {Table, Index}); |
20346 | 0 | } |
20347 | 0 | case WebAssembly::BI__builtin_wasm_table_set: { |
20348 | 0 | assert(E->getArg(0)->getType()->isArrayType()); |
20349 | 0 | Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); |
20350 | 0 | Value *Index = EmitScalarExpr(E->getArg(1)); |
20351 | 0 | Value *Val = EmitScalarExpr(E->getArg(2)); |
20352 | 0 | Function *Callee; |
20353 | 0 | if (E->getArg(2)->getType().isWebAssemblyExternrefType()) |
20354 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref); |
20355 | 0 | else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) |
20356 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref); |
20357 | 0 | else |
20358 | 0 | llvm_unreachable( |
20359 | 0 | "Unexpected reference type for __builtin_wasm_table_set"); |
20360 | 0 | return Builder.CreateCall(Callee, {Table, Index, Val}); |
20361 | 0 | } |
20362 | 0 | case WebAssembly::BI__builtin_wasm_table_size: { |
20363 | 0 | assert(E->getArg(0)->getType()->isArrayType()); |
20364 | 0 | Value *Value = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); |
20365 | 0 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size); |
20366 | 0 | return Builder.CreateCall(Callee, Value); |
20367 | 0 | } |
20368 | 0 | case WebAssembly::BI__builtin_wasm_table_grow: { |
20369 | 0 | assert(E->getArg(0)->getType()->isArrayType()); |
20370 | 0 | Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); |
20371 | 0 | Value *Val = EmitScalarExpr(E->getArg(1)); |
20372 | 0 | Value *NElems = EmitScalarExpr(E->getArg(2)); |
20373 | |
|
20374 | 0 | Function *Callee; |
20375 | 0 | if (E->getArg(1)->getType().isWebAssemblyExternrefType()) |
20376 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref); |
20377 | 0 | else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) |
20378 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); |
20379 | 0 | else |
20380 | 0 | llvm_unreachable( |
20381 | 0 | "Unexpected reference type for __builtin_wasm_table_grow"); |
20382 | |
|
20383 | 0 | return Builder.CreateCall(Callee, {Table, Val, NElems}); |
20384 | 0 | } |
20385 | 0 | case WebAssembly::BI__builtin_wasm_table_fill: { |
20386 | 0 | assert(E->getArg(0)->getType()->isArrayType()); |
20387 | 0 | Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); |
20388 | 0 | Value *Index = EmitScalarExpr(E->getArg(1)); |
20389 | 0 | Value *Val = EmitScalarExpr(E->getArg(2)); |
20390 | 0 | Value *NElems = EmitScalarExpr(E->getArg(3)); |
20391 | |
|
20392 | 0 | Function *Callee; |
20393 | 0 | if (E->getArg(2)->getType().isWebAssemblyExternrefType()) |
20394 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref); |
20395 | 0 | else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) |
20396 | 0 | Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); |
20397 | 0 | else |
20398 | 0 | llvm_unreachable( |
20399 | 0 | "Unexpected reference type for __builtin_wasm_table_fill"); |
20400 | |
|
20401 | 0 | return Builder.CreateCall(Callee, {Table, Index, Val, NElems}); |
20402 | 0 | } |
20403 | 0 | case WebAssembly::BI__builtin_wasm_table_copy: { |
20404 | 0 | assert(E->getArg(0)->getType()->isArrayType()); |
20405 | 0 | Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); |
20406 | 0 | Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).getPointer(); |
20407 | 0 | Value *DstIdx = EmitScalarExpr(E->getArg(2)); |
20408 | 0 | Value *SrcIdx = EmitScalarExpr(E->getArg(3)); |
20409 | 0 | Value *NElems = EmitScalarExpr(E->getArg(4)); |
20410 | |
|
20411 | 0 | Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy); |
20412 | |
|
20413 | 0 | return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems}); |
20414 | 0 | } |
20415 | 0 | default: |
20416 | 0 | return nullptr; |
20417 | 283 | } |
20418 | 283 | } |
20419 | | |
20420 | | static std::pair<Intrinsic::ID, unsigned> |
20421 | 0 | getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) { |
20422 | 0 | struct Info { |
20423 | 0 | unsigned BuiltinID; |
20424 | 0 | Intrinsic::ID IntrinsicID; |
20425 | 0 | unsigned VecLen; |
20426 | 0 | }; |
20427 | 0 | static Info Infos[] = { |
20428 | 0 | #define CUSTOM_BUILTIN_MAPPING(x,s) \ |
20429 | 0 | { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s }, |
20430 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0) |
20431 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0) |
20432 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0) |
20433 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0) |
20434 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0) |
20435 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0) |
20436 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0) |
20437 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0) |
20438 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0) |
20439 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0) |
20440 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0) |
20441 | 0 | CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0) |
20442 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0) |
20443 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0) |
20444 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0) |
20445 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0) |
20446 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0) |
20447 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0) |
20448 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0) |
20449 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0) |
20450 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0) |
20451 | 0 | CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0) |
20452 | | // Legacy builtins that take a vector in place of a vector predicate. |
20453 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64) |
20454 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64) |
20455 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64) |
20456 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64) |
20457 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128) |
20458 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128) |
20459 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128) |
20460 | 0 | CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128) |
20461 | 0 | #include "clang/Basic/BuiltinsHexagonMapCustomDep.def" |
20462 | 0 | #undef CUSTOM_BUILTIN_MAPPING |
20463 | 0 | }; |
20464 | |
|
20465 | 0 | auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; }; |
20466 | 0 | static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true); |
20467 | 0 | (void)SortOnce; |
20468 | |
|
20469 | 0 | const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo); |
20470 | 0 | if (F == std::end(Infos) || F->BuiltinID != BuiltinID) |
20471 | 0 | return {Intrinsic::not_intrinsic, 0}; |
20472 | | |
20473 | 0 | return {F->IntrinsicID, F->VecLen}; |
20474 | 0 | } |
20475 | | |
20476 | | Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, |
20477 | 0 | const CallExpr *E) { |
20478 | 0 | Intrinsic::ID ID; |
20479 | 0 | unsigned VecLen; |
20480 | 0 | std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID); |
20481 | |
|
20482 | 0 | auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { |
20483 | | // The base pointer is passed by address, so it needs to be loaded. |
20484 | 0 | Address A = EmitPointerWithAlignment(E->getArg(0)); |
20485 | 0 | Address BP = Address(A.getPointer(), Int8PtrTy, A.getAlignment()); |
20486 | 0 | llvm::Value *Base = Builder.CreateLoad(BP); |
20487 | | // The treatment of both loads and stores is the same: the arguments for |
20488 | | // the builtin are the same as the arguments for the intrinsic. |
20489 | | // Load: |
20490 | | // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start) |
20491 | | // builtin(Base, Mod, Start) -> intr(Base, Mod, Start) |
20492 | | // Store: |
20493 | | // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start) |
20494 | | // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start) |
20495 | 0 | SmallVector<llvm::Value*,5> Ops = { Base }; |
20496 | 0 | for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i) |
20497 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(i))); |
20498 | |
|
20499 | 0 | llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); |
20500 | | // The load intrinsics generate two results (Value, NewBase), stores |
20501 | | // generate one (NewBase). The new base address needs to be stored. |
20502 | 0 | llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1) |
20503 | 0 | : Result; |
20504 | 0 | llvm::Value *LV = EmitScalarExpr(E->getArg(0)); |
20505 | 0 | Address Dest = EmitPointerWithAlignment(E->getArg(0)); |
20506 | 0 | llvm::Value *RetVal = |
20507 | 0 | Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); |
20508 | 0 | if (IsLoad) |
20509 | 0 | RetVal = Builder.CreateExtractValue(Result, 0); |
20510 | 0 | return RetVal; |
20511 | 0 | }; |
20512 | | |
20513 | | // Handle the conversion of bit-reverse load intrinsics to bit code. |
20514 | | // The intrinsic call after this function only reads from memory and the |
20515 | | // write to memory is dealt by the store instruction. |
20516 | 0 | auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) { |
20517 | | // The intrinsic generates one result, which is the new value for the base |
20518 | | // pointer. It needs to be returned. The result of the load instruction is |
20519 | | // passed to intrinsic by address, so the value needs to be stored. |
20520 | 0 | llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0)); |
20521 | | |
20522 | | // Expressions like &(*pt++) will be incremented per evaluation. |
20523 | | // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression |
20524 | | // per call. |
20525 | 0 | Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); |
20526 | 0 | DestAddr = Address(DestAddr.getPointer(), Int8Ty, DestAddr.getAlignment()); |
20527 | 0 | llvm::Value *DestAddress = DestAddr.getPointer(); |
20528 | | |
20529 | | // Operands are Base, Dest, Modifier. |
20530 | | // The intrinsic format in LLVM IR is defined as |
20531 | | // { ValueType, i8* } (i8*, i32). |
20532 | 0 | llvm::Value *Result = Builder.CreateCall( |
20533 | 0 | CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))}); |
20534 | | |
20535 | | // The value needs to be stored as the variable is passed by reference. |
20536 | 0 | llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0); |
20537 | | |
20538 | | // The store needs to be truncated to fit the destination type. |
20539 | | // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs |
20540 | | // to be handled with stores of respective destination type. |
20541 | 0 | DestVal = Builder.CreateTrunc(DestVal, DestTy); |
20542 | |
|
20543 | 0 | Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment()); |
20544 | | // The updated value of the base pointer is returned. |
20545 | 0 | return Builder.CreateExtractValue(Result, 1); |
20546 | 0 | }; |
20547 | |
|
20548 | 0 | auto V2Q = [this, VecLen] (llvm::Value *Vec) { |
20549 | 0 | Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B |
20550 | 0 | : Intrinsic::hexagon_V6_vandvrt; |
20551 | 0 | return Builder.CreateCall(CGM.getIntrinsic(ID), |
20552 | 0 | {Vec, Builder.getInt32(-1)}); |
20553 | 0 | }; |
20554 | 0 | auto Q2V = [this, VecLen] (llvm::Value *Pred) { |
20555 | 0 | Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B |
20556 | 0 | : Intrinsic::hexagon_V6_vandqrt; |
20557 | 0 | return Builder.CreateCall(CGM.getIntrinsic(ID), |
20558 | 0 | {Pred, Builder.getInt32(-1)}); |
20559 | 0 | }; |
20560 | |
|
20561 | 0 | switch (BuiltinID) { |
20562 | | // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR, |
20563 | | // and the corresponding C/C++ builtins use loads/stores to update |
20564 | | // the predicate. |
20565 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: |
20566 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: |
20567 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: |
20568 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { |
20569 | | // Get the type from the 0-th argument. |
20570 | 0 | llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); |
20571 | 0 | Address PredAddr = |
20572 | 0 | EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); |
20573 | 0 | llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr)); |
20574 | 0 | llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), |
20575 | 0 | {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn}); |
20576 | |
|
20577 | 0 | llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); |
20578 | 0 | Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(), |
20579 | 0 | PredAddr.getAlignment()); |
20580 | 0 | return Builder.CreateExtractValue(Result, 0); |
20581 | 0 | } |
20582 | | // These are identical to the builtins above, except they don't consume |
20583 | | // input carry, only generate carry-out. Since they still produce two |
20584 | | // outputs, generate the store of the predicate, but no load. |
20585 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo: |
20586 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B: |
20587 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo: |
20588 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: { |
20589 | | // Get the type from the 0-th argument. |
20590 | 0 | llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); |
20591 | 0 | Address PredAddr = |
20592 | 0 | EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); |
20593 | 0 | llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), |
20594 | 0 | {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); |
20595 | |
|
20596 | 0 | llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); |
20597 | 0 | Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(), |
20598 | 0 | PredAddr.getAlignment()); |
20599 | 0 | return Builder.CreateExtractValue(Result, 0); |
20600 | 0 | } |
20601 | | |
20602 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq: |
20603 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq: |
20604 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq: |
20605 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq: |
20606 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B: |
20607 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B: |
20608 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B: |
20609 | 0 | case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: { |
20610 | 0 | SmallVector<llvm::Value*,4> Ops; |
20611 | 0 | const Expr *PredOp = E->getArg(0); |
20612 | | // There will be an implicit cast to a boolean vector. Strip it. |
20613 | 0 | if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) { |
20614 | 0 | if (Cast->getCastKind() == CK_BitCast) |
20615 | 0 | PredOp = Cast->getSubExpr(); |
20616 | 0 | Ops.push_back(V2Q(EmitScalarExpr(PredOp))); |
20617 | 0 | } |
20618 | 0 | for (int i = 1, e = E->getNumArgs(); i != e; ++i) |
20619 | 0 | Ops.push_back(EmitScalarExpr(E->getArg(i))); |
20620 | 0 | return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); |
20621 | 0 | } |
20622 | | |
20623 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: |
20624 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: |
20625 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: |
20626 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci: |
20627 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci: |
20628 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci: |
20629 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr: |
20630 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr: |
20631 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr: |
20632 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr: |
20633 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr: |
20634 | 0 | case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr: |
20635 | 0 | return MakeCircOp(ID, /*IsLoad=*/true); |
20636 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci: |
20637 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci: |
20638 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci: |
20639 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci: |
20640 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci: |
20641 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr: |
20642 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr: |
20643 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr: |
20644 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr: |
20645 | 0 | case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr: |
20646 | 0 | return MakeCircOp(ID, /*IsLoad=*/false); |
20647 | 0 | case Hexagon::BI__builtin_brev_ldub: |
20648 | 0 | return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty); |
20649 | 0 | case Hexagon::BI__builtin_brev_ldb: |
20650 | 0 | return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty); |
20651 | 0 | case Hexagon::BI__builtin_brev_lduh: |
20652 | 0 | return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty); |
20653 | 0 | case Hexagon::BI__builtin_brev_ldh: |
20654 | 0 | return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty); |
20655 | 0 | case Hexagon::BI__builtin_brev_ldw: |
20656 | 0 | return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); |
20657 | 0 | case Hexagon::BI__builtin_brev_ldd: |
20658 | 0 | return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); |
20659 | 0 | } // switch |
20660 | | |
20661 | 0 | return nullptr; |
20662 | 0 | } |
20663 | | |
20664 | | Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, |
20665 | | const CallExpr *E, |
20666 | 52 | ReturnValueSlot ReturnValue) { |
20667 | 52 | SmallVector<Value *, 4> Ops; |
20668 | 52 | llvm::Type *ResultType = ConvertType(E->getType()); |
20669 | | |
20670 | | // Find out if any arguments are required to be integer constant expressions. |
20671 | 52 | unsigned ICEArguments = 0; |
20672 | 52 | ASTContext::GetBuiltinTypeError Error; |
20673 | 52 | getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); |
20674 | 52 | if (Error == ASTContext::GE_Missing_type) { |
20675 | | // Vector intrinsics don't have a type string. |
20676 | 0 | assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin && |
20677 | 0 | BuiltinID <= clang::RISCV::LastRVVBuiltin); |
20678 | 0 | ICEArguments = 0; |
20679 | 0 | if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v || |
20680 | 0 | BuiltinID == RISCVVector::BI__builtin_rvv_vset_v) |
20681 | 0 | ICEArguments = 1 << 1; |
20682 | 52 | } else { |
20683 | 52 | assert(Error == ASTContext::GE_None && "Unexpected error"); |
20684 | 52 | } |
20685 | | |
20686 | 52 | if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load) |
20687 | 0 | ICEArguments |= (1 << 1); |
20688 | 52 | if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store) |
20689 | 0 | ICEArguments |= (1 << 2); |
20690 | | |
20691 | 128 | for (unsigned i = 0, e = E->getNumArgs(); i != e; i++76 ) { |
20692 | | // Handle aggregate argument, namely RVV tuple types in segment load/store |
20693 | 76 | if (hasAggregateEvaluationKind(E->getArg(i)->getType())) { |
20694 | 0 | LValue L = EmitAggExprToLValue(E->getArg(i)); |
20695 | 0 | llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this)); |
20696 | 0 | Ops.push_back(AggValue); |
20697 | 0 | continue; |
20698 | 0 | } |
20699 | 76 | Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); |
20700 | 76 | } |
20701 | | |
20702 | 52 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
20703 | 52 | unsigned NF = 1; |
20704 | | // The 0th bit simulates the `vta` of RVV |
20705 | | // The 1st bit simulates the `vma` of RVV |
20706 | 52 | constexpr unsigned RVV_VTA = 0x1; |
20707 | 52 | constexpr unsigned RVV_VMA = 0x2; |
20708 | 52 | int PolicyAttrs = 0; |
20709 | 52 | bool IsMasked = false; |
20710 | | |
20711 | | // Required for overloaded intrinsics. |
20712 | 52 | llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes; |
20713 | 52 | switch (BuiltinID) { |
20714 | 0 | default: llvm_unreachable("unexpected builtin ID"); |
20715 | 2 | case RISCV::BI__builtin_riscv_orc_b_32: |
20716 | 3 | case RISCV::BI__builtin_riscv_orc_b_64: |
20717 | 9 | case RISCV::BI__builtin_riscv_clz_32: |
20718 | 12 | case RISCV::BI__builtin_riscv_clz_64: |
20719 | 14 | case RISCV::BI__builtin_riscv_ctz_32: |
20720 | 15 | case RISCV::BI__builtin_riscv_ctz_64: |
20721 | 19 | case RISCV::BI__builtin_riscv_clmul_32: |
20722 | 21 | case RISCV::BI__builtin_riscv_clmul_64: |
20723 | 23 | case RISCV::BI__builtin_riscv_clmulh_32: |
20724 | 25 | case RISCV::BI__builtin_riscv_clmulh_64: |
20725 | 26 | case RISCV::BI__builtin_riscv_clmulr_32: |
20726 | 27 | case RISCV::BI__builtin_riscv_clmulr_64: |
20727 | 28 | case RISCV::BI__builtin_riscv_xperm4_32: |
20728 | 29 | case RISCV::BI__builtin_riscv_xperm4_64: |
20729 | 30 | case RISCV::BI__builtin_riscv_xperm8_32: |
20730 | 31 | case RISCV::BI__builtin_riscv_xperm8_64: |
20731 | 33 | case RISCV::BI__builtin_riscv_brev8_32: |
20732 | 34 | case RISCV::BI__builtin_riscv_brev8_64: |
20733 | 35 | case RISCV::BI__builtin_riscv_zip_32: |
20734 | 36 | case RISCV::BI__builtin_riscv_unzip_32: { |
20735 | 36 | switch (BuiltinID) { |
20736 | 0 | default: llvm_unreachable("unexpected builtin ID"); |
20737 | | // Zbb |
20738 | 2 | case RISCV::BI__builtin_riscv_orc_b_32: |
20739 | 3 | case RISCV::BI__builtin_riscv_orc_b_64: |
20740 | 3 | ID = Intrinsic::riscv_orc_b; |
20741 | 3 | break; |
20742 | 6 | case RISCV::BI__builtin_riscv_clz_32: |
20743 | 9 | case RISCV::BI__builtin_riscv_clz_64: { |
20744 | 9 | Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); |
20745 | 9 | Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); |
20746 | 9 | if (Result->getType() != ResultType) |
20747 | 3 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
20748 | 3 | "cast"); |
20749 | 9 | return Result; |
20750 | 6 | } |
20751 | 2 | case RISCV::BI__builtin_riscv_ctz_32: |
20752 | 3 | case RISCV::BI__builtin_riscv_ctz_64: { |
20753 | 3 | Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); |
20754 | 3 | Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); |
20755 | 3 | if (Result->getType() != ResultType) |
20756 | 1 | Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, |
20757 | 1 | "cast"); |
20758 | 3 | return Result; |
20759 | 2 | } |
20760 | | |
20761 | | // Zbc |
20762 | 4 | case RISCV::BI__builtin_riscv_clmul_32: |
20763 | 6 | case RISCV::BI__builtin_riscv_clmul_64: |
20764 | 6 | ID = Intrinsic::riscv_clmul; |
20765 | 6 | break; |
20766 | 2 | case RISCV::BI__builtin_riscv_clmulh_32: |
20767 | 4 | case RISCV::BI__builtin_riscv_clmulh_64: |
20768 | 4 | ID = Intrinsic::riscv_clmulh; |
20769 | 4 | break; |
20770 | 1 | case RISCV::BI__builtin_riscv_clmulr_32: |
20771 | 2 | case RISCV::BI__builtin_riscv_clmulr_64: |
20772 | 2 | ID = Intrinsic::riscv_clmulr; |
20773 | 2 | break; |
20774 | | |
20775 | | // Zbkx |
20776 | 1 | case RISCV::BI__builtin_riscv_xperm8_32: |
20777 | 2 | case RISCV::BI__builtin_riscv_xperm8_64: |
20778 | 2 | ID = Intrinsic::riscv_xperm8; |
20779 | 2 | break; |
20780 | 1 | case RISCV::BI__builtin_riscv_xperm4_32: |
20781 | 2 | case RISCV::BI__builtin_riscv_xperm4_64: |
20782 | 2 | ID = Intrinsic::riscv_xperm4; |
20783 | 2 | break; |
20784 | | |
20785 | | // Zbkb |
20786 | 2 | case RISCV::BI__builtin_riscv_brev8_32: |
20787 | 3 | case RISCV::BI__builtin_riscv_brev8_64: |
20788 | 3 | ID = Intrinsic::riscv_brev8; |
20789 | 3 | break; |
20790 | 1 | case RISCV::BI__builtin_riscv_zip_32: |
20791 | 1 | ID = Intrinsic::riscv_zip; |
20792 | 1 | break; |
20793 | 1 | case RISCV::BI__builtin_riscv_unzip_32: |
20794 | 1 | ID = Intrinsic::riscv_unzip; |
20795 | 1 | break; |
20796 | 36 | } |
20797 | | |
20798 | 24 | IntrinsicTypes = {ResultType}; |
20799 | 24 | break; |
20800 | 36 | } |
20801 | | |
20802 | | // Zk builtins |
20803 | | |
20804 | | // Zknh |
20805 | 2 | case RISCV::BI__builtin_riscv_sha256sig0: |
20806 | 2 | ID = Intrinsic::riscv_sha256sig0; |
20807 | 2 | break; |
20808 | 2 | case RISCV::BI__builtin_riscv_sha256sig1: |
20809 | 2 | ID = Intrinsic::riscv_sha256sig1; |
20810 | 2 | break; |
20811 | 2 | case RISCV::BI__builtin_riscv_sha256sum0: |
20812 | 2 | ID = Intrinsic::riscv_sha256sum0; |
20813 | 2 | break; |
20814 | 2 | case RISCV::BI__builtin_riscv_sha256sum1: |
20815 | 2 | ID = Intrinsic::riscv_sha256sum1; |
20816 | 2 | break; |
20817 | | |
20818 | | // Zksed |
20819 | 2 | case RISCV::BI__builtin_riscv_sm4ks: |
20820 | 2 | ID = Intrinsic::riscv_sm4ks; |
20821 | 2 | break; |
20822 | 2 | case RISCV::BI__builtin_riscv_sm4ed: |
20823 | 2 | ID = Intrinsic::riscv_sm4ed; |
20824 | 2 | break; |
20825 | | |
20826 | | // Zksh |
20827 | 2 | case RISCV::BI__builtin_riscv_sm3p0: |
20828 | 2 | ID = Intrinsic::riscv_sm3p0; |
20829 | 2 | break; |
20830 | 2 | case RISCV::BI__builtin_riscv_sm3p1: |
20831 | 2 | ID = Intrinsic::riscv_sm3p1; |
20832 | 2 | break; |
20833 | | |
20834 | | // Zihintntl |
20835 | 0 | case RISCV::BI__builtin_riscv_ntl_load: { |
20836 | 0 | llvm::Type *ResTy = ConvertType(E->getType()); |
20837 | 0 | unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL |
20838 | 0 | if (Ops.size() == 2) |
20839 | 0 | DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue(); |
20840 | |
|
20841 | 0 | llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( |
20842 | 0 | getLLVMContext(), |
20843 | 0 | llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); |
20844 | 0 | llvm::MDNode *NontemporalNode = llvm::MDNode::get( |
20845 | 0 | getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); |
20846 | |
|
20847 | 0 | int Width; |
20848 | 0 | if(ResTy->isScalableTy()) { |
20849 | 0 | const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy); |
20850 | 0 | llvm::Type *ScalarTy = ResTy->getScalarType(); |
20851 | 0 | Width = ScalarTy->getPrimitiveSizeInBits() * |
20852 | 0 | SVTy->getElementCount().getKnownMinValue(); |
20853 | 0 | } else |
20854 | 0 | Width = ResTy->getPrimitiveSizeInBits(); |
20855 | 0 | LoadInst *Load = Builder.CreateLoad( |
20856 | 0 | Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8))); |
20857 | |
|
20858 | 0 | Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); |
20859 | 0 | Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), |
20860 | 0 | RISCVDomainNode); |
20861 | |
|
20862 | 0 | return Load; |
20863 | 36 | } |
20864 | 0 | case RISCV::BI__builtin_riscv_ntl_store: { |
20865 | 0 | unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL |
20866 | 0 | if (Ops.size() == 3) |
20867 | 0 | DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue(); |
20868 | |
|
20869 | 0 | llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( |
20870 | 0 | getLLVMContext(), |
20871 | 0 | llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); |
20872 | 0 | llvm::MDNode *NontemporalNode = llvm::MDNode::get( |
20873 | 0 | getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); |
20874 | |
|
20875 | 0 | StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); |
20876 | 0 | Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); |
20877 | 0 | Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), |
20878 | 0 | RISCVDomainNode); |
20879 | |
|
20880 | 0 | return Store; |
20881 | 36 | } |
20882 | | |
20883 | | // Vector builtins are handled from here. |
20884 | 0 | #include "clang/Basic/riscv_vector_builtin_cg.inc" |
20885 | | // SiFive Vector builtins are handled from here. |
20886 | 52 | #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"0 |
20887 | 52 | } |
20888 | | |
20889 | 40 | assert(ID != Intrinsic::not_intrinsic); |
20890 | | |
20891 | 40 | llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); |
20892 | 40 | return Builder.CreateCall(F, Ops, ""); |
20893 | 40 | } |