Coverage Report

Created: 2023-11-11 10:31

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
Line
Count
Source (jump to first uncovered line)
1
//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This contains code to emit Builtin calls as LLVM code.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "ABIInfo.h"
14
#include "CGCUDARuntime.h"
15
#include "CGCXXABI.h"
16
#include "CGObjCRuntime.h"
17
#include "CGOpenCLRuntime.h"
18
#include "CGRecordLayout.h"
19
#include "CodeGenFunction.h"
20
#include "CodeGenModule.h"
21
#include "ConstantEmitter.h"
22
#include "PatternInit.h"
23
#include "TargetInfo.h"
24
#include "clang/AST/ASTContext.h"
25
#include "clang/AST/Attr.h"
26
#include "clang/AST/Decl.h"
27
#include "clang/AST/OSLog.h"
28
#include "clang/AST/OperationKinds.h"
29
#include "clang/Basic/TargetBuiltins.h"
30
#include "clang/Basic/TargetInfo.h"
31
#include "clang/Basic/TargetOptions.h"
32
#include "clang/CodeGen/CGFunctionInfo.h"
33
#include "clang/Frontend/FrontendDiagnostic.h"
34
#include "llvm/ADT/APFloat.h"
35
#include "llvm/ADT/APInt.h"
36
#include "llvm/ADT/FloatingPointMode.h"
37
#include "llvm/ADT/SmallPtrSet.h"
38
#include "llvm/ADT/StringExtras.h"
39
#include "llvm/Analysis/ValueTracking.h"
40
#include "llvm/IR/DataLayout.h"
41
#include "llvm/IR/InlineAsm.h"
42
#include "llvm/IR/Intrinsics.h"
43
#include "llvm/IR/IntrinsicsAArch64.h"
44
#include "llvm/IR/IntrinsicsAMDGPU.h"
45
#include "llvm/IR/IntrinsicsARM.h"
46
#include "llvm/IR/IntrinsicsBPF.h"
47
#include "llvm/IR/IntrinsicsHexagon.h"
48
#include "llvm/IR/IntrinsicsNVPTX.h"
49
#include "llvm/IR/IntrinsicsPowerPC.h"
50
#include "llvm/IR/IntrinsicsR600.h"
51
#include "llvm/IR/IntrinsicsRISCV.h"
52
#include "llvm/IR/IntrinsicsS390.h"
53
#include "llvm/IR/IntrinsicsVE.h"
54
#include "llvm/IR/IntrinsicsWebAssembly.h"
55
#include "llvm/IR/IntrinsicsX86.h"
56
#include "llvm/IR/MDBuilder.h"
57
#include "llvm/IR/MatrixBuilder.h"
58
#include "llvm/Support/ConvertUTF.h"
59
#include "llvm/Support/MathExtras.h"
60
#include "llvm/Support/ScopedPrinter.h"
61
#include "llvm/TargetParser/AArch64TargetParser.h"
62
#include "llvm/TargetParser/X86TargetParser.h"
63
#include <optional>
64
#include <sstream>
65
66
using namespace clang;
67
using namespace CodeGen;
68
using namespace llvm;
69
70
static llvm::cl::opt<bool> ClSanitizeAlignmentBuiltin(
71
    "sanitize-alignment-builtin", llvm::cl::Hidden,
72
    llvm::cl::desc("Instrument builtin functions for -fsanitize=alignment"),
73
    llvm::cl::init(true));
74
75
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
76
33
                             Align AlignmentInBytes) {
77
33
  ConstantInt *Byte;
78
33
  switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
79
19
  case LangOptions::TrivialAutoVarInitKind::Uninitialized:
80
    // Nothing to initialize.
81
19
    return;
82
7
  case LangOptions::TrivialAutoVarInitKind::Zero:
83
7
    Byte = CGF.Builder.getInt8(0x00);
84
7
    break;
85
7
  case LangOptions::TrivialAutoVarInitKind::Pattern: {
86
7
    llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
87
7
    Byte = llvm::dyn_cast<llvm::ConstantInt>(
88
7
        initializationPatternFor(CGF.CGM, Int8));
89
7
    break;
90
0
  }
91
33
  }
92
14
  if (CGF.CGM.stopAutoInit())
93
8
    return;
94
6
  auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
95
6
  I->addAnnotationMetadata("auto-init");
96
6
}
97
98
/// getBuiltinLibFunction - Given a builtin id for a function like
99
/// "__builtin_fabsf", return a Function* for "fabsf".
100
llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
101
1.81k
                                                     unsigned BuiltinID) {
102
1.81k
  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
103
104
  // Get the name, skip over the __builtin_ prefix (if necessary).
105
1.81k
  StringRef Name;
106
1.81k
  GlobalDecl D(FD);
107
108
  // TODO: This list should be expanded or refactored after all GCC-compatible
109
  // std libcall builtins are implemented.
110
1.81k
  static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
111
1.81k
      {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
112
1.81k
      {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
113
1.81k
      {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
114
1.81k
      {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
115
1.81k
      {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
116
1.81k
      {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
117
1.81k
      {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
118
1.81k
      {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
119
1.81k
      {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
120
1.81k
      {Builtin::BI__builtin_printf, "__printfieee128"},
121
1.81k
      {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
122
1.81k
      {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
123
1.81k
      {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
124
1.81k
      {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
125
1.81k
      {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
126
1.81k
      {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
127
1.81k
      {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
128
1.81k
      {Builtin::BI__builtin_scanf, "__scanfieee128"},
129
1.81k
      {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
130
1.81k
      {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
131
1.81k
      {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
132
1.81k
      {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
133
1.81k
      {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
134
1.81k
  };
135
136
  // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
137
  // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
138
  // if it is 64-bit 'long double' mode.
139
1.81k
  static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
140
1.81k
      {Builtin::BI__builtin_frexpl, "frexp"},
141
1.81k
      {Builtin::BI__builtin_ldexpl, "ldexp"},
142
1.81k
      {Builtin::BI__builtin_modfl, "modf"},
143
1.81k
  };
144
145
  // If the builtin has been declared explicitly with an assembler label,
146
  // use the mangled name. This differs from the plain label on platforms
147
  // that prefix labels.
148
1.81k
  if (FD->hasAttr<AsmLabelAttr>())
149
0
    Name = getMangledName(D);
150
1.81k
  else {
151
    // TODO: This mutation should also be applied to other targets other than
152
    // PPC, after backend supports IEEE 128-bit style libcalls.
153
1.81k
    if (getTriple().isPPC64() &&
154
1.81k
        
&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()119
&&
155
1.81k
        
F128Builtins.contains(BuiltinID)59
)
156
13
      Name = F128Builtins[BuiltinID];
157
1.80k
    else if (getTriple().isOSAIX() &&
158
1.80k
             &getTarget().getLongDoubleFormat() ==
159
2
                 &llvm::APFloat::IEEEdouble() &&
160
1.80k
             
AIXLongDouble64Builtins.contains(BuiltinID)2
)
161
2
      Name = AIXLongDouble64Builtins[BuiltinID];
162
1.80k
    else
163
1.80k
      Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
164
1.81k
  }
165
166
1.81k
  llvm::FunctionType *Ty =
167
1.81k
    cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
168
169
1.81k
  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
170
1.81k
}
171
172
/// Emit the conversions required to turn the given value into an
173
/// integer of the given size.
174
static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
175
626
                        QualType T, llvm::IntegerType *IntType) {
176
626
  V = CGF.EmitToMemory(V, T);
177
178
626
  if (V->getType()->isPointerTy())
179
16
    return CGF.Builder.CreatePtrToInt(V, IntType);
180
181
610
  assert(V->getType() == IntType);
182
610
  return V;
183
610
}
184
185
static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
186
583
                          QualType T, llvm::Type *ResultType) {
187
583
  V = CGF.EmitFromMemory(V, T);
188
189
583
  if (ResultType->isPointerTy())
190
10
    return CGF.Builder.CreateIntToPtr(V, ResultType);
191
192
573
  assert(V->getType() == ResultType);
193
573
  return V;
194
573
}
195
196
static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF,
197
610
                                         const CallExpr *E) {
198
610
  ASTContext &Ctx = CGF.getContext();
199
610
  Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
200
610
  unsigned Bytes = Ptr.getElementType()->isPointerTy()
201
610
                       ? 
Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()13
202
610
                       : 
Ptr.getElementType()->getScalarSizeInBits() / 8597
;
203
610
  unsigned Align = Ptr.getAlignment().getQuantity();
204
610
  if (Align % Bytes != 0) {
205
5
    DiagnosticsEngine &Diags = CGF.CGM.getDiags();
206
5
    Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
207
5
  }
208
610
  return Ptr.getPointer();
209
610
}
210
211
/// Utility to insert an atomic instruction based on Intrinsic::ID
212
/// and the expression node.
213
static Value *MakeBinaryAtomicValue(
214
    CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
215
504
    AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
216
217
504
  QualType T = E->getType();
218
504
  assert(E->getArg(0)->getType()->isPointerType());
219
504
  assert(CGF.getContext().hasSameUnqualifiedType(T,
220
504
                                  E->getArg(0)->getType()->getPointeeType()));
221
504
  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
222
223
504
  llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
224
225
504
  llvm::IntegerType *IntType = llvm::IntegerType::get(
226
504
      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
227
228
504
  llvm::Value *Args[2];
229
504
  Args[0] = DestPtr;
230
504
  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
231
504
  llvm::Type *ValueType = Args[1]->getType();
232
504
  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
233
234
504
  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
235
504
      Kind, Args[0], Args[1], Ordering);
236
504
  return EmitFromInt(CGF, Result, T, ValueType);
237
504
}
238
239
84
static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
240
84
  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
241
84
  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
242
243
84
  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
244
84
  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getArg(0)->getType());
245
84
  LV.setNontemporal(true);
246
84
  CGF.EmitStoreOfScalar(Val, LV, false);
247
84
  return nullptr;
248
84
}
249
250
35
static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
251
35
  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
252
253
35
  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
254
35
  LV.setNontemporal(true);
255
35
  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
256
35
}
257
258
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
259
                               llvm::AtomicRMWInst::BinOp Kind,
260
134
                               const CallExpr *E) {
261
134
  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
262
134
}
263
264
/// Utility to insert an atomic instruction based Intrinsic::ID and
265
/// the expression node, where the return value is the result of the
266
/// operation.
267
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
268
                                   llvm::AtomicRMWInst::BinOp Kind,
269
                                   const CallExpr *E,
270
                                   Instruction::BinaryOps Op,
271
64
                                   bool Invert = false) {
272
64
  QualType T = E->getType();
273
64
  assert(E->getArg(0)->getType()->isPointerType());
274
64
  assert(CGF.getContext().hasSameUnqualifiedType(T,
275
64
                                  E->getArg(0)->getType()->getPointeeType()));
276
64
  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
277
278
64
  llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
279
280
64
  llvm::IntegerType *IntType = llvm::IntegerType::get(
281
64
      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
282
283
64
  llvm::Value *Args[2];
284
64
  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
285
64
  llvm::Type *ValueType = Args[1]->getType();
286
64
  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
287
64
  Args[0] = DestPtr;
288
289
64
  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
290
64
      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
291
64
  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
292
64
  if (Invert)
293
12
    Result =
294
12
        CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
295
12
                                llvm::ConstantInt::getAllOnesValue(IntType));
296
64
  Result = EmitFromInt(CGF, Result, T, ValueType);
297
64
  return RValue::get(Result);
298
64
}
299
300
/// Utility to insert an atomic cmpxchg instruction.
301
///
302
/// @param CGF The current codegen function.
303
/// @param E   Builtin call expression to convert to cmpxchg.
304
///            arg0 - address to operate on
305
///            arg1 - value to compare with
306
///            arg2 - new value
307
/// @param ReturnBool Specifies whether to return success flag of
308
///                   cmpxchg result or the old value.
309
///
310
/// @returns result of cmpxchg, according to ReturnBool
311
///
312
/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
313
/// invoke the function EmitAtomicCmpXchgForMSIntrin.
314
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
315
29
                                     bool ReturnBool) {
316
29
  QualType T = ReturnBool ? 
E->getArg(1)->getType()14
:
E->getType()15
;
317
29
  llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
318
319
29
  llvm::IntegerType *IntType = llvm::IntegerType::get(
320
29
      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
321
322
29
  Value *Args[3];
323
29
  Args[0] = DestPtr;
324
29
  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
325
29
  llvm::Type *ValueType = Args[1]->getType();
326
29
  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
327
29
  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
328
329
29
  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
330
29
      Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
331
29
      llvm::AtomicOrdering::SequentiallyConsistent);
332
29
  if (ReturnBool)
333
    // Extract boolean success flag and zext it to int.
334
14
    return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
335
14
                                  CGF.ConvertType(E->getType()));
336
15
  else
337
    // Extract old value and emit it using the same type as compare value.
338
15
    return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
339
15
                       ValueType);
340
29
}
341
342
/// This function should be invoked to emit atomic cmpxchg for Microsoft's
343
/// _InterlockedCompareExchange* intrinsics which have the following signature:
344
/// T _InterlockedCompareExchange(T volatile *Destination,
345
///                               T Exchange,
346
///                               T Comparand);
347
///
348
/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
349
/// cmpxchg *Destination, Comparand, Exchange.
350
/// So we need to swap Comparand and Exchange when invoking
351
/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
352
/// function MakeAtomicCmpXchgValue since it expects the arguments to be
353
/// already swapped.
354
355
static
356
Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
357
68
    AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
358
68
  assert(E->getArg(0)->getType()->isPointerType());
359
68
  assert(CGF.getContext().hasSameUnqualifiedType(
360
68
      E->getType(), E->getArg(0)->getType()->getPointeeType()));
361
68
  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
362
68
                                                 E->getArg(1)->getType()));
363
68
  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
364
68
                                                 E->getArg(2)->getType()));
365
366
68
  auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
367
68
  auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
368
68
  auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
369
370
  // For Release ordering, the failure ordering should be Monotonic.
371
68
  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
372
14
                         AtomicOrdering::Monotonic :
373
68
                         
SuccessOrdering54
;
374
375
  // The atomic instruction is marked volatile for consistency with MSVC. This
376
  // blocks the few atomics optimizations that LLVM has. If we want to optimize
377
  // _Interlocked* operations in the future, we will have to remove the volatile
378
  // marker.
379
68
  auto *Result = CGF.Builder.CreateAtomicCmpXchg(
380
68
                   Destination, Comparand, Exchange,
381
68
                   SuccessOrdering, FailureOrdering);
382
68
  Result->setVolatile(true);
383
68
  return CGF.Builder.CreateExtractValue(Result, 0);
384
68
}
385
386
// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
387
// prototyped like this:
388
//
389
// unsigned char _InterlockedCompareExchange128...(
390
//     __int64 volatile * _Destination,
391
//     __int64 _ExchangeHigh,
392
//     __int64 _ExchangeLow,
393
//     __int64 * _ComparandResult);
394
static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
395
                                              const CallExpr *E,
396
5
                                              AtomicOrdering SuccessOrdering) {
397
5
  assert(E->getNumArgs() == 4);
398
5
  llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
399
5
  llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
400
5
  llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
401
5
  llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
402
403
5
  assert(Destination->getType()->isPointerTy());
404
5
  assert(!ExchangeHigh->getType()->isPointerTy());
405
5
  assert(!ExchangeLow->getType()->isPointerTy());
406
5
  assert(ComparandPtr->getType()->isPointerTy());
407
408
  // For Release ordering, the failure ordering should be Monotonic.
409
5
  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
410
5
                             ? 
AtomicOrdering::Monotonic1
411
5
                             : 
SuccessOrdering4
;
412
413
  // Convert to i128 pointers and values.
414
5
  llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
415
5
  Address ComparandResult(ComparandPtr, Int128Ty,
416
5
                          CGF.getContext().toCharUnitsFromBits(128));
417
418
  // (((i128)hi) << 64) | ((i128)lo)
419
5
  ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
420
5
  ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
421
5
  ExchangeHigh =
422
5
      CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
423
5
  llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
424
425
  // Load the comparand for the instruction.
426
5
  llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
427
428
5
  auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
429
5
                                              SuccessOrdering, FailureOrdering);
430
431
  // The atomic instruction is marked volatile for consistency with MSVC. This
432
  // blocks the few atomics optimizations that LLVM has. If we want to optimize
433
  // _Interlocked* operations in the future, we will have to remove the volatile
434
  // marker.
435
5
  CXI->setVolatile(true);
436
437
  // Store the result as an outparameter.
438
5
  CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
439
5
                          ComparandResult);
440
441
  // Get the success boolean and zero extend it to i8.
442
5
  Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
443
5
  return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
444
5
}
445
446
static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
447
58
    AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
448
58
  assert(E->getArg(0)->getType()->isPointerType());
449
450
58
  auto *IntTy = CGF.ConvertType(E->getType());
451
58
  auto *Result = CGF.Builder.CreateAtomicRMW(
452
58
                   AtomicRMWInst::Add,
453
58
                   CGF.EmitScalarExpr(E->getArg(0)),
454
58
                   ConstantInt::get(IntTy, 1),
455
58
                   Ordering);
456
58
  return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
457
58
}
458
459
static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
460
58
    AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
461
58
  assert(E->getArg(0)->getType()->isPointerType());
462
463
58
  auto *IntTy = CGF.ConvertType(E->getType());
464
58
  auto *Result = CGF.Builder.CreateAtomicRMW(
465
58
                   AtomicRMWInst::Sub,
466
58
                   CGF.EmitScalarExpr(E->getArg(0)),
467
58
                   ConstantInt::get(IntTy, 1),
468
58
                   Ordering);
469
58
  return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
470
58
}
471
472
// Build a plain volatile load.
473
16
static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
474
16
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
475
16
  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
476
16
  CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
477
16
  llvm::Type *ITy =
478
16
      llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
479
16
  llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
480
16
  Load->setVolatile(true);
481
16
  return Load;
482
16
}
483
484
// Build a plain volatile store.
485
16
static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
486
16
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
487
16
  Value *Value = CGF.EmitScalarExpr(E->getArg(1));
488
16
  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
489
16
  CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
490
16
  llvm::StoreInst *Store =
491
16
      CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
492
16
  Store->setVolatile(true);
493
16
  return Store;
494
16
}
495
496
// Emit a simple mangled intrinsic that has 1 argument and a return type
497
// matching the argument type. Depending on mode, this may be a constrained
498
// floating-point intrinsic.
499
static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
500
                                const CallExpr *E, unsigned IntrinsicID,
501
810
                                unsigned ConstrainedIntrinsicID) {
502
810
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
503
504
810
  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
505
810
  if (CGF.Builder.getIsFPConstrained()) {
506
101
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
507
101
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
508
709
  } else {
509
709
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
510
709
    return CGF.Builder.CreateCall(F, Src0);
511
709
  }
512
810
}
513
514
// Emit an intrinsic that has 2 operands of the same type as its result.
515
// Depending on mode, this may be a constrained floating-point intrinsic.
516
static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
517
                                const CallExpr *E, unsigned IntrinsicID,
518
188
                                unsigned ConstrainedIntrinsicID) {
519
188
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
520
188
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
521
522
188
  if (CGF.Builder.getIsFPConstrained()) {
523
21
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
524
21
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
525
21
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
526
167
  } else {
527
167
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
528
167
    return CGF.Builder.CreateCall(F, { Src0, Src1 });
529
167
  }
530
188
}
531
532
// Has second type mangled argument.
533
static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
534
    CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
535
18
    llvm::Intrinsic::ID ConstrainedIntrinsicID) {
536
18
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
537
18
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
538
539
18
  if (CGF.Builder.getIsFPConstrained()) {
540
4
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
541
4
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
542
4
                                       {Src0->getType(), Src1->getType()});
543
4
    return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
544
4
  }
545
546
14
  Function *F =
547
14
      CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
548
14
  return CGF.Builder.CreateCall(F, {Src0, Src1});
549
18
}
550
551
// Emit an intrinsic that has 3 operands of the same type as its result.
552
// Depending on mode, this may be a constrained floating-point intrinsic.
553
static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
554
                                 const CallExpr *E, unsigned IntrinsicID,
555
79
                                 unsigned ConstrainedIntrinsicID) {
556
79
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
557
79
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
558
79
  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
559
560
79
  if (CGF.Builder.getIsFPConstrained()) {
561
7
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
562
7
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
563
7
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
564
72
  } else {
565
72
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
566
72
    return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
567
72
  }
568
79
}
569
570
// Emit an intrinsic where all operands are of the same type as the result.
571
// Depending on mode, this may be a constrained floating-point intrinsic.
572
static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
573
                                                unsigned IntrinsicID,
574
                                                unsigned ConstrainedIntrinsicID,
575
                                                llvm::Type *Ty,
576
124
                                                ArrayRef<Value *> Args) {
577
124
  Function *F;
578
124
  if (CGF.Builder.getIsFPConstrained())
579
38
    F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
580
86
  else
581
86
    F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
582
583
124
  if (CGF.Builder.getIsFPConstrained())
584
38
    return CGF.Builder.CreateConstrainedFPCall(F, Args);
585
86
  else
586
86
    return CGF.Builder.CreateCall(F, Args);
587
124
}
588
589
// Emit a simple mangled intrinsic that has 1 argument and a return type
590
// matching the argument type.
591
static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
592
                               unsigned IntrinsicID,
593
713
                               llvm::StringRef Name = "") {
594
713
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
595
596
713
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
597
713
  return CGF.Builder.CreateCall(F, Src0, Name);
598
713
}
599
600
// Emit an intrinsic that has 2 operands of the same type as its result.
601
static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
602
                                const CallExpr *E,
603
110
                                unsigned IntrinsicID) {
604
110
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
605
110
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
606
607
110
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
608
110
  return CGF.Builder.CreateCall(F, { Src0, Src1 });
609
110
}
610
611
// Emit an intrinsic that has 3 operands of the same type as its result.
612
static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
613
                                 const CallExpr *E,
614
29
                                 unsigned IntrinsicID) {
615
29
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
616
29
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
617
29
  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
618
619
29
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
620
29
  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
621
29
}
622
623
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
624
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
625
                               const CallExpr *E,
626
9
                               unsigned IntrinsicID) {
627
9
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
628
9
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
629
630
9
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
631
9
  return CGF.Builder.CreateCall(F, {Src0, Src1});
632
9
}
633
634
// Emit an intrinsic that has overloaded integer result and fp operand.
635
static Value *
636
emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
637
                                        unsigned IntrinsicID,
638
96
                                        unsigned ConstrainedIntrinsicID) {
639
96
  llvm::Type *ResultType = CGF.ConvertType(E->getType());
640
96
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
641
642
96
  if (CGF.Builder.getIsFPConstrained()) {
643
28
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
644
28
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
645
28
                                       {ResultType, Src0->getType()});
646
28
    return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
647
68
  } else {
648
68
    Function *F =
649
68
        CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
650
68
    return CGF.Builder.CreateCall(F, Src0);
651
68
  }
652
96
}
653
654
static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
655
39
                               llvm::Intrinsic::ID IntrinsicID) {
656
39
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
657
39
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
658
659
39
  QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
660
39
  llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
661
39
  llvm::Function *F =
662
39
      CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
663
39
  llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
664
665
39
  llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
666
39
  LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
667
39
  CGF.EmitStoreOfScalar(Exp, LV);
668
669
39
  return CGF.Builder.CreateExtractValue(Call, 0);
670
39
}
671
672
/// EmitFAbs - Emit a call to @llvm.fabs().
673
10
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
674
10
  Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
675
10
  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
676
10
  Call->setDoesNotAccessMemory();
677
10
  return Call;
678
10
}
679
680
/// Emit the computation of the sign bit for a floating point value. Returns
681
/// the i1 sign bit value.
682
39
static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
683
39
  LLVMContext &C = CGF.CGM.getLLVMContext();
684
685
39
  llvm::Type *Ty = V->getType();
686
39
  int Width = Ty->getPrimitiveSizeInBits();
687
39
  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
688
39
  V = CGF.Builder.CreateBitCast(V, IntTy);
689
39
  if (Ty->isPPC_FP128Ty()) {
690
    // We want the sign bit of the higher-order double. The bitcast we just
691
    // did works as if the double-double was stored to memory and then
692
    // read as an i128. The "store" will put the higher-order double in the
693
    // lower address in both little- and big-Endian modes, but the "load"
694
    // will treat those bits as a different part of the i128: the low bits in
695
    // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
696
    // we need to shift the high bits down to the low before truncating.
697
14
    Width >>= 1;
698
14
    if (CGF.getTarget().isBigEndian()) {
699
9
      Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
700
9
      V = CGF.Builder.CreateLShr(V, ShiftCst);
701
9
    }
702
    // We are truncating value in order to extract the higher-order
703
    // double, which we will be using to extract the sign from.
704
14
    IntTy = llvm::IntegerType::get(C, Width);
705
14
    V = CGF.Builder.CreateTrunc(V, IntTy);
706
14
  }
707
39
  Value *Zero = llvm::Constant::getNullValue(IntTy);
708
39
  return CGF.Builder.CreateICmpSLT(V, Zero);
709
39
}
710
711
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
712
13.6k
                              const CallExpr *E, llvm::Constant *calleeValue) {
713
13.6k
  CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
714
13.6k
  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
715
13.6k
}
716
717
/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
718
/// depending on IntrinsicID.
719
///
720
/// \arg CGF The current codegen function.
721
/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
722
/// \arg X The first argument to the llvm.*.with.overflow.*.
723
/// \arg Y The second argument to the llvm.*.with.overflow.*.
724
/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
725
/// \returns The result (i.e. sum/product) returned by the intrinsic.
726
static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
727
                                          const llvm::Intrinsic::ID IntrinsicID,
728
                                          llvm::Value *X, llvm::Value *Y,
729
220
                                          llvm::Value *&Carry) {
730
  // Make sure we have integers of the same width.
731
220
  assert(X->getType() == Y->getType() &&
732
220
         "Arguments must be the same type. (Did you forget to make sure both "
733
220
         "arguments have the same integer width?)");
734
735
220
  Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
736
220
  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
737
220
  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
738
220
  return CGF.Builder.CreateExtractValue(Tmp, 0);
739
220
}
740
741
static Value *emitRangedBuiltin(CodeGenFunction &CGF,
742
                                unsigned IntrinsicID,
743
6
                                int low, int high) {
744
6
    llvm::MDBuilder MDHelper(CGF.getLLVMContext());
745
6
    llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
746
6
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
747
6
    llvm::Instruction *Call = CGF.Builder.CreateCall(F);
748
6
    Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
749
6
    Call->setMetadata(llvm::LLVMContext::MD_noundef,
750
6
                      llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
751
6
    return Call;
752
6
}
753
754
namespace {
755
  struct WidthAndSignedness {
756
    unsigned Width;
757
    bool Signed;
758
  };
759
}
760
761
static WidthAndSignedness
762
getIntegerWidthAndSignedness(const clang::ASTContext &context,
763
315
                             const clang::QualType Type) {
764
315
  assert(Type->isIntegerType() && "Given type is not an integer.");
765
315
  unsigned Width = Type->isBooleanType()  ? 
118
766
315
                   : 
Type->isBitIntType()297
?
context.getIntWidth(Type)45
767
297
                                          : 
context.getTypeInfo(Type).Width252
;
768
315
  bool Signed = Type->isSignedIntegerType();
769
315
  return {Width, Signed};
770
315
}
771
772
// Given one or more integer types, this function produces an integer type that
773
// encompasses them: any value in one of the given types could be expressed in
774
// the encompassing type.
775
static struct WidthAndSignedness
776
69
EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
777
69
  assert(Types.size() > 0 && "Empty list of types.");
778
779
  // If any of the given types is signed, we must return a signed type.
780
69
  bool Signed = false;
781
207
  for (const auto &Type : Types) {
782
207
    Signed |= Type.Signed;
783
207
  }
784
785
  // The encompassing type must have a width greater than or equal to the width
786
  // of the specified types.  Additionally, if the encompassing type is signed,
787
  // its width must be strictly greater than the width of any unsigned types
788
  // given.
789
69
  unsigned Width = 0;
790
207
  for (const auto &Type : Types) {
791
207
    unsigned MinWidth = Type.Width + (Signed && 
!Type.Signed153
);
792
207
    if (Width < MinWidth) {
793
79
      Width = MinWidth;
794
79
    }
795
207
  }
796
797
69
  return {Width, Signed};
798
69
}
799
800
519
Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
801
519
  Intrinsic::ID inst = IsStart ? 
Intrinsic::vastart279
:
Intrinsic::vaend240
;
802
519
  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
803
519
}
804
805
/// Checks if using the result of __builtin_object_size(p, @p From) in place of
806
/// __builtin_object_size(p, @p To) is correct
807
38
static bool areBOSTypesCompatible(int From, int To) {
808
  // Note: Our __builtin_object_size implementation currently treats Type=0 and
809
  // Type=2 identically. Encoding this implementation detail here may make
810
  // improving __builtin_object_size difficult in the future, so it's omitted.
811
38
  return From == To || 
(12
From == 012
&&
To == 13
) ||
(11
From == 311
&&
To == 23
);
812
38
}
813
814
static llvm::Value *
815
57
getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
816
57
  return ConstantInt::get(ResType, (Type & 2) ? 
035
:
-122
, /*isSigned=*/true);
817
57
}
818
819
llvm::Value *
820
CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
821
                                                 llvm::IntegerType *ResType,
822
                                                 llvm::Value *EmittedE,
823
107
                                                 bool IsDynamic) {
824
107
  uint64_t ObjectSize;
825
107
  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
826
55
    return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
827
52
  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
828
107
}
829
830
/// Returns a Value corresponding to the size of the given expression.
831
/// This Value may be either of the following:
832
///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
833
///     it)
834
///   - A call to the @llvm.objectsize intrinsic
835
///
836
/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
837
/// and we wouldn't otherwise try to reference a pass_object_size parameter,
838
/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
839
llvm::Value *
840
CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
841
                                       llvm::IntegerType *ResType,
842
349
                                       llvm::Value *EmittedE, bool IsDynamic) {
843
  // We need to reference an argument if the pointer is a parameter with the
844
  // pass_object_size attribute.
845
349
  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
846
149
    auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
847
149
    auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
848
149
    if (Param != nullptr && 
PS != nullptr86
&&
849
149
        
areBOSTypesCompatible(PS->getType(), Type)38
) {
850
28
      auto Iter = SizeArguments.find(Param);
851
28
      assert(Iter != SizeArguments.end());
852
853
28
      const ImplicitParamDecl *D = Iter->second;
854
28
      auto DIter = LocalDeclMap.find(D);
855
28
      assert(DIter != LocalDeclMap.end());
856
857
28
      return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
858
28
                              getContext().getSizeType(), E->getBeginLoc());
859
28
    }
860
149
  }
861
862
  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
863
  // evaluate E for side-effects. In either case, we shouldn't lower to
864
  // @llvm.objectsize.
865
321
  if (Type == 3 || 
(286
!EmittedE286
&&
E->HasSideEffects(getContext())246
))
866
35
    return getDefaultBuiltinObjectSizeResult(Type, ResType);
867
868
286
  if (IsDynamic) {
869
    // The code generated here calculates the size of a struct with a flexible
870
    // array member that uses the counted_by attribute. There are two instances
871
    // we handle:
872
    //
873
    //       struct s {
874
    //         unsigned long flags;
875
    //         int count;
876
    //         int array[] __attribute__((counted_by(count)));
877
    //       }
878
    //
879
    //   1) bdos of the flexible array itself:
880
    //
881
    //     __builtin_dynamic_object_size(p->array, 1) ==
882
    //         p->count * sizeof(*p->array)
883
    //
884
    //   2) bdos of a pointer into the flexible array:
885
    //
886
    //     __builtin_dynamic_object_size(&p->array[42], 1) ==
887
    //         (p->count - 42) * sizeof(*p->array)
888
    //
889
    //   2) bdos of the whole struct, including the flexible array:
890
    //
891
    //     __builtin_dynamic_object_size(p, 1) ==
892
    //        max(sizeof(struct s),
893
    //            offsetof(struct s, array) + p->count * sizeof(*p->array))
894
    //
895
140
    const Expr *Base = E->IgnoreParenImpCasts();
896
140
    const Expr *Idx = nullptr;
897
140
    if (const auto *UO = dyn_cast<UnaryOperator>(Base);
898
140
        UO && 
UO->getOpcode() == UO_AddrOf24
) {
899
24
      if (const auto *ASE =
900
24
              dyn_cast<ArraySubscriptExpr>(UO->getSubExpr()->IgnoreParens())) {
901
19
        Base = ASE->getBase();
902
19
        Idx = ASE->getIdx()->IgnoreParenImpCasts();
903
904
19
        if (const auto *IL = dyn_cast<IntegerLiteral>(Idx);
905
19
            IL && 
!IL->getValue().getSExtValue()15
)
906
4
          Idx = nullptr;
907
19
      }
908
24
    }
909
910
140
    if (const ValueDecl *CountedByFD = FindCountedByField(Base)) {
911
22
      bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
912
22
      const RecordDecl *OuterRD =
913
22
          CountedByFD->getDeclContext()->getOuterLexicalRecordContext();
914
22
      ASTContext &Ctx = getContext();
915
916
      // Load the counted_by field.
917
22
      const Expr *CountedByExpr = BuildCountedByFieldExpr(Base, CountedByFD);
918
22
      Value *CountedByInst = EmitAnyExprToTemp(CountedByExpr).getScalarVal();
919
22
      llvm::Type *CountedByTy = CountedByInst->getType();
920
921
22
      if (Idx) {
922
        // There's an index into the array. Remove it from the count.
923
6
        bool IdxSigned = Idx->getType()->isSignedIntegerType();
924
6
        Value *IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
925
6
        IdxInst = IdxSigned ? Builder.CreateSExtOrTrunc(IdxInst, CountedByTy)
926
6
                            : 
Builder.CreateZExtOrTrunc(IdxInst, CountedByTy)0
;
927
928
        // If the index is negative, don't subtract it from the counted_by
929
        // value. The pointer is pointing to something before the FAM.
930
6
        IdxInst = Builder.CreateNeg(IdxInst, "", !IdxSigned, IdxSigned);
931
6
        CountedByInst =
932
6
            Builder.CreateAdd(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
933
6
      }
934
935
      // Get the size of the flexible array member's base type.
936
22
      const ValueDecl *FAMDecl = nullptr;
937
22
      if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
938
8
        const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
939
8
            getLangOpts().getStrictFlexArraysLevel();
940
8
        if (const ValueDecl *MD = ME->getMemberDecl();
941
8
            MD && Decl::isFlexibleArrayMemberLike(
942
8
                      Ctx, MD, MD->getType(), StrictFlexArraysLevel,
943
8
                      /*IgnoreTemplateOrMacroSubstitution=*/true))
944
          // Base is referencing the FAM itself.
945
8
          FAMDecl = MD;
946
8
      }
947
948
22
      if (!FAMDecl)
949
14
        FAMDecl = FindFlexibleArrayMemberField(Ctx, OuterRD);
950
951
22
      assert(FAMDecl && "Can't find the flexible array member field");
952
953
22
      const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
954
22
      CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
955
22
      llvm::Constant *ElemSize =
956
22
          llvm::ConstantInt::get(CountedByTy, Size.getQuantity(), IsSigned);
957
958
      // Calculate how large the flexible array member is in bytes.
959
22
      Value *FAMSize =
960
22
          Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
961
22
      FAMSize = IsSigned ? 
Builder.CreateSExtOrTrunc(FAMSize, ResType)16
962
22
                         : 
Builder.CreateZExtOrTrunc(FAMSize, ResType)6
;
963
22
      Value *Res = FAMSize;
964
965
22
      if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
966
        // The whole struct is specificed in the __bdos.
967
8
        const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
968
969
        // Get the offset of the FAM.
970
8
        CharUnits Offset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(FAMDecl));
971
8
        llvm::Constant *FAMOffset =
972
8
            ConstantInt::get(ResType, Offset.getQuantity(), IsSigned);
973
974
        // max(sizeof(struct s),
975
        //     offsetof(struct s, array) + p->count * sizeof(*p->array))
976
8
        Value *OffsetAndFAMSize =
977
8
            Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
978
979
        // Get the full size of the struct.
980
8
        llvm::Constant *SizeofStruct =
981
8
            ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
982
983
8
        Res = IsSigned
984
8
                  ? Builder.CreateBinaryIntrinsic(
985
6
                        llvm::Intrinsic::smax, OffsetAndFAMSize, SizeofStruct)
986
8
                  : Builder.CreateBinaryIntrinsic(
987
2
                        llvm::Intrinsic::umax, OffsetAndFAMSize, SizeofStruct);
988
14
      } else if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
989
        // Pointing to a place before the FAM. Add the difference to the FAM's
990
        // size.
991
8
        if (const ValueDecl *MD = ME->getMemberDecl(); MD != FAMDecl) {
992
0
          CharUnits Offset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(MD));
993
0
          CharUnits FAMOffset =
994
0
              Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(FAMDecl));
995
996
0
          Res = Builder.CreateAdd(
997
0
              Res, ConstantInt::get(ResType, FAMOffset.getQuantity() -
998
0
                                                 Offset.getQuantity()));
999
0
        }
1000
8
      }
1001
1002
      // A negative 'FAMSize' means that the index was greater than the count,
1003
      // or an improperly set count field. Return -1 (for types 0 and 1) or 0
1004
      // (for types 2 and 3).
1005
22
      return Builder.CreateSelect(
1006
22
          Builder.CreateIsNeg(FAMSize),
1007
22
          getDefaultBuiltinObjectSizeResult(Type, ResType), Res);
1008
22
    }
1009
140
  }
1010
1011
264
  Value *Ptr = EmittedE ? 
EmittedE40
:
EmitScalarExpr(E)224
;
1012
264
  assert(Ptr->getType()->isPointerTy() &&
1013
264
         "Non-pointer passed to __builtin_object_size?");
1014
1015
264
  Function *F =
1016
264
      CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1017
1018
  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1019
264
  Value *Min = Builder.getInt1((Type & 2) != 0);
1020
  // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1021
264
  Value *NullIsUnknown = Builder.getTrue();
1022
264
  Value *Dynamic = Builder.getInt1(IsDynamic);
1023
264
  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1024
264
}
1025
1026
namespace {
1027
/// A struct to generically describe a bit test intrinsic.
1028
struct BitTest {
1029
  enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1030
  enum InterlockingKind : uint8_t {
1031
    Unlocked,
1032
    Sequential,
1033
    Acquire,
1034
    Release,
1035
    NoFence
1036
  };
1037
1038
  ActionKind Action;
1039
  InterlockingKind Interlocking;
1040
  bool Is64Bit;
1041
1042
  static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1043
};
1044
} // namespace
1045
1046
51
BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1047
51
  switch (BuiltinID) {
1048
    // Main portable variants.
1049
3
  case Builtin::BI_bittest:
1050
3
    return {TestOnly, Unlocked, false};
1051
3
  case Builtin::BI_bittestandcomplement:
1052
3
    return {Complement, Unlocked, false};
1053
3
  case Builtin::BI_bittestandreset:
1054
3
    return {Reset, Unlocked, false};
1055
3
  case Builtin::BI_bittestandset:
1056
3
    return {Set, Unlocked, false};
1057
3
  case Builtin::BI_interlockedbittestandreset:
1058
3
    return {Reset, Sequential, false};
1059
6
  case Builtin::BI_interlockedbittestandset:
1060
6
    return {Set, Sequential, false};
1061
1062
    // X86-specific 64-bit variants.
1063
3
  case Builtin::BI_bittest64:
1064
3
    return {TestOnly, Unlocked, true};
1065
3
  case Builtin::BI_bittestandcomplement64:
1066
3
    return {Complement, Unlocked, true};
1067
3
  case Builtin::BI_bittestandreset64:
1068
3
    return {Reset, Unlocked, true};
1069
3
  case Builtin::BI_bittestandset64:
1070
3
    return {Set, Unlocked, true};
1071
3
  case Builtin::BI_interlockedbittestandreset64:
1072
3
    return {Reset, Sequential, true};
1073
3
  case Builtin::BI_interlockedbittestandset64:
1074
3
    return {Set, Sequential, true};
1075
1076
    // ARM/AArch64-specific ordering variants.
1077
2
  case Builtin::BI_interlockedbittestandset_acq:
1078
2
    return {Set, Acquire, false};
1079
2
  case Builtin::BI_interlockedbittestandset_rel:
1080
2
    return {Set, Release, false};
1081
2
  case Builtin::BI_interlockedbittestandset_nf:
1082
2
    return {Set, NoFence, false};
1083
2
  case Builtin::BI_interlockedbittestandreset_acq:
1084
2
    return {Reset, Acquire, false};
1085
2
  case Builtin::BI_interlockedbittestandreset_rel:
1086
2
    return {Reset, Release, false};
1087
2
  case Builtin::BI_interlockedbittestandreset_nf:
1088
2
    return {Reset, NoFence, false};
1089
51
  }
1090
0
  llvm_unreachable("expected only bittest intrinsics");
1091
0
}
1092
1093
13
static char bitActionToX86BTCode(BitTest::ActionKind A) {
1094
13
  switch (A) {
1095
2
  case BitTest::TestOnly:   return '\0';
1096
2
  case BitTest::Complement: return 'c';
1097
4
  case BitTest::Reset:      return 'r';
1098
5
  case BitTest::Set:        return 's';
1099
13
  }
1100
0
  llvm_unreachable("invalid action");
1101
0
}
1102
1103
static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
1104
                                            BitTest BT,
1105
                                            const CallExpr *E, Value *BitBase,
1106
13
                                            Value *BitPos) {
1107
13
  char Action = bitActionToX86BTCode(BT.Action);
1108
13
  char SizeSuffix = BT.Is64Bit ? 
'q'6
:
'l'7
;
1109
1110
  // Build the assembly.
1111
13
  SmallString<64> Asm;
1112
13
  raw_svector_ostream AsmOS(Asm);
1113
13
  if (BT.Interlocking != BitTest::Unlocked)
1114
5
    AsmOS << "lock ";
1115
13
  AsmOS << "bt";
1116
13
  if (Action)
1117
11
    AsmOS << Action;
1118
13
  AsmOS << SizeSuffix << " $2, ($1)";
1119
1120
  // Build the constraints. FIXME: We should support immediates when possible.
1121
13
  std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1122
13
  std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1123
13
  if (!MachineClobbers.empty()) {
1124
13
    Constraints += ',';
1125
13
    Constraints += MachineClobbers;
1126
13
  }
1127
13
  llvm::IntegerType *IntType = llvm::IntegerType::get(
1128
13
      CGF.getLLVMContext(),
1129
13
      CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1130
13
  llvm::FunctionType *FTy =
1131
13
      llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1132
1133
13
  llvm::InlineAsm *IA =
1134
13
      llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1135
13
  return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1136
13
}
1137
1138
static llvm::AtomicOrdering
1139
38
getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1140
38
  switch (I) {
1141
16
  case BitTest::Unlocked:   return llvm::AtomicOrdering::NotAtomic;
1142
10
  case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1143
4
  case BitTest::Acquire:    return llvm::AtomicOrdering::Acquire;
1144
4
  case BitTest::Release:    return llvm::AtomicOrdering::Release;
1145
4
  case BitTest::NoFence:    return llvm::AtomicOrdering::Monotonic;
1146
38
  }
1147
0
  llvm_unreachable("invalid interlocking");
1148
0
}
1149
1150
/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1151
/// bits and a bit position and read and optionally modify the bit at that
1152
/// position. The position index can be arbitrarily large, i.e. it can be larger
1153
/// than 31 or 63, so we need an indexed load in the general case.
1154
static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1155
                                         unsigned BuiltinID,
1156
51
                                         const CallExpr *E) {
1157
51
  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1158
51
  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1159
1160
51
  BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1161
1162
  // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1163
  // indexing operation internally. Use them if possible.
1164
51
  if (CGF.getTarget().getTriple().isX86())
1165
13
    return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1166
1167
  // Otherwise, use generic code to load one byte and test the bit. Use all but
1168
  // the bottom three bits as the array index, and the bottom three bits to form
1169
  // a mask.
1170
  // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1171
38
  Value *ByteIndex = CGF.Builder.CreateAShr(
1172
38
      BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1173
38
  Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1174
38
  Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1175
38
                                                 ByteIndex, "bittest.byteaddr"),
1176
38
                   CGF.Int8Ty, CharUnits::One());
1177
38
  Value *PosLow =
1178
38
      CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1179
38
                            llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1180
1181
  // The updating instructions will need a mask.
1182
38
  Value *Mask = nullptr;
1183
38
  if (BT.Action != BitTest::TestOnly) {
1184
34
    Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1185
34
                                 "bittest.mask");
1186
34
  }
1187
1188
  // Check the action and ordering of the interlocked intrinsics.
1189
38
  llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1190
1191
38
  Value *OldByte = nullptr;
1192
38
  if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1193
    // Emit a combined atomicrmw load/store operation for the interlocked
1194
    // intrinsics.
1195
22
    llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1196
22
    if (BT.Action == BitTest::Reset) {
1197
10
      Mask = CGF.Builder.CreateNot(Mask);
1198
10
      RMWOp = llvm::AtomicRMWInst::And;
1199
10
    }
1200
22
    OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
1201
22
                                          Ordering);
1202
22
  } else {
1203
    // Emit a plain load for the non-interlocked intrinsics.
1204
16
    OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1205
16
    Value *NewByte = nullptr;
1206
16
    switch (BT.Action) {
1207
4
    case BitTest::TestOnly:
1208
      // Don't store anything.
1209
4
      break;
1210
4
    case BitTest::Complement:
1211
4
      NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1212
4
      break;
1213
4
    case BitTest::Reset:
1214
4
      NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1215
4
      break;
1216
4
    case BitTest::Set:
1217
4
      NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1218
4
      break;
1219
16
    }
1220
16
    if (NewByte)
1221
12
      CGF.Builder.CreateStore(NewByte, ByteAddr);
1222
16
  }
1223
1224
  // However we loaded the old byte, either by plain load or atomicrmw, shift
1225
  // the bit into the low position and mask it to 0 or 1.
1226
38
  Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1227
38
  return CGF.Builder.CreateAnd(
1228
38
      ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1229
38
}
1230
1231
static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
1232
                                                unsigned BuiltinID,
1233
0
                                                const CallExpr *E) {
1234
0
  Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1235
1236
0
  SmallString<64> Asm;
1237
0
  raw_svector_ostream AsmOS(Asm);
1238
0
  llvm::IntegerType *RetType = CGF.Int32Ty;
1239
1240
0
  switch (BuiltinID) {
1241
0
  case clang::PPC::BI__builtin_ppc_ldarx:
1242
0
    AsmOS << "ldarx ";
1243
0
    RetType = CGF.Int64Ty;
1244
0
    break;
1245
0
  case clang::PPC::BI__builtin_ppc_lwarx:
1246
0
    AsmOS << "lwarx ";
1247
0
    RetType = CGF.Int32Ty;
1248
0
    break;
1249
0
  case clang::PPC::BI__builtin_ppc_lharx:
1250
0
    AsmOS << "lharx ";
1251
0
    RetType = CGF.Int16Ty;
1252
0
    break;
1253
0
  case clang::PPC::BI__builtin_ppc_lbarx:
1254
0
    AsmOS << "lbarx ";
1255
0
    RetType = CGF.Int8Ty;
1256
0
    break;
1257
0
  default:
1258
0
    llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1259
0
  }
1260
1261
0
  AsmOS << "$0, ${1:y}";
1262
1263
0
  std::string Constraints = "=r,*Z,~{memory}";
1264
0
  std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1265
0
  if (!MachineClobbers.empty()) {
1266
0
    Constraints += ',';
1267
0
    Constraints += MachineClobbers;
1268
0
  }
1269
1270
0
  llvm::Type *PtrType = CGF.UnqualPtrTy;
1271
0
  llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1272
1273
0
  llvm::InlineAsm *IA =
1274
0
      llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1275
0
  llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1276
0
  CI->addParamAttr(
1277
0
      0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1278
0
  return CI;
1279
0
}
1280
1281
namespace {
1282
enum class MSVCSetJmpKind {
1283
  _setjmpex,
1284
  _setjmp3,
1285
  _setjmp
1286
};
1287
}
1288
1289
/// MSVC handles setjmp a bit differently on different platforms. On every
1290
/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1291
/// parameters can be passed as variadic arguments, but we always pass none.
1292
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1293
12
                               const CallExpr *E) {
1294
12
  llvm::Value *Arg1 = nullptr;
1295
12
  llvm::Type *Arg1Ty = nullptr;
1296
12
  StringRef Name;
1297
12
  bool IsVarArg = false;
1298
12
  if (SJKind == MSVCSetJmpKind::_setjmp3) {
1299
2
    Name = "_setjmp3";
1300
2
    Arg1Ty = CGF.Int32Ty;
1301
2
    Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1302
2
    IsVarArg = true;
1303
10
  } else {
1304
10
    Name = SJKind == MSVCSetJmpKind::_setjmp ? 
"_setjmp"2
:
"_setjmpex"8
;
1305
10
    Arg1Ty = CGF.Int8PtrTy;
1306
10
    if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1307
4
      Arg1 = CGF.Builder.CreateCall(
1308
4
          CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1309
4
    } else
1310
6
      Arg1 = CGF.Builder.CreateCall(
1311
6
          CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1312
6
          llvm::ConstantInt::get(CGF.Int32Ty, 0));
1313
10
  }
1314
1315
  // Mark the call site and declaration with ReturnsTwice.
1316
12
  llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1317
12
  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1318
12
      CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1319
12
      llvm::Attribute::ReturnsTwice);
1320
12
  llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1321
12
      llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1322
12
      ReturnsTwiceAttr, /*Local=*/true);
1323
1324
12
  llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1325
12
      CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1326
12
  llvm::Value *Args[] = {Buf, Arg1};
1327
12
  llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1328
12
  CB->setAttributes(ReturnsTwiceAttr);
1329
12
  return RValue::get(CB);
1330
12
}
1331
1332
// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1333
// we handle them here.
1334
enum class CodeGenFunction::MSVCIntrin {
1335
  _BitScanForward,
1336
  _BitScanReverse,
1337
  _InterlockedAnd,
1338
  _InterlockedDecrement,
1339
  _InterlockedExchange,
1340
  _InterlockedExchangeAdd,
1341
  _InterlockedExchangeSub,
1342
  _InterlockedIncrement,
1343
  _InterlockedOr,
1344
  _InterlockedXor,
1345
  _InterlockedExchangeAdd_acq,
1346
  _InterlockedExchangeAdd_rel,
1347
  _InterlockedExchangeAdd_nf,
1348
  _InterlockedExchange_acq,
1349
  _InterlockedExchange_rel,
1350
  _InterlockedExchange_nf,
1351
  _InterlockedCompareExchange_acq,
1352
  _InterlockedCompareExchange_rel,
1353
  _InterlockedCompareExchange_nf,
1354
  _InterlockedCompareExchange128,
1355
  _InterlockedCompareExchange128_acq,
1356
  _InterlockedCompareExchange128_rel,
1357
  _InterlockedCompareExchange128_nf,
1358
  _InterlockedOr_acq,
1359
  _InterlockedOr_rel,
1360
  _InterlockedOr_nf,
1361
  _InterlockedXor_acq,
1362
  _InterlockedXor_rel,
1363
  _InterlockedXor_nf,
1364
  _InterlockedAnd_acq,
1365
  _InterlockedAnd_rel,
1366
  _InterlockedAnd_nf,
1367
  _InterlockedIncrement_acq,
1368
  _InterlockedIncrement_rel,
1369
  _InterlockedIncrement_nf,
1370
  _InterlockedDecrement_acq,
1371
  _InterlockedDecrement_rel,
1372
  _InterlockedDecrement_nf,
1373
  __fastfail,
1374
};
1375
1376
static std::optional<CodeGenFunction::MSVCIntrin>
1377
6.56k
translateArmToMsvcIntrin(unsigned BuiltinID) {
1378
6.56k
  using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1379
6.56k
  switch (BuiltinID) {
1380
6.40k
  default:
1381
6.40k
    return std::nullopt;
1382
3
  case clang::ARM::BI_BitScanForward:
1383
4
  case clang::ARM::BI_BitScanForward64:
1384
4
    return MSVCIntrin::_BitScanForward;
1385
3
  case clang::ARM::BI_BitScanReverse:
1386
4
  case clang::ARM::BI_BitScanReverse64:
1387
4
    return MSVCIntrin::_BitScanReverse;
1388
1
  case clang::ARM::BI_InterlockedAnd64:
1389
1
    return MSVCIntrin::_InterlockedAnd;
1390
1
  case clang::ARM::BI_InterlockedExchange64:
1391
1
    return MSVCIntrin::_InterlockedExchange;
1392
1
  case clang::ARM::BI_InterlockedExchangeAdd64:
1393
1
    return MSVCIntrin::_InterlockedExchangeAdd;
1394
1
  case clang::ARM::BI_InterlockedExchangeSub64:
1395
1
    return MSVCIntrin::_InterlockedExchangeSub;
1396
1
  case clang::ARM::BI_InterlockedOr64:
1397
1
    return MSVCIntrin::_InterlockedOr;
1398
1
  case clang::ARM::BI_InterlockedXor64:
1399
1
    return MSVCIntrin::_InterlockedXor;
1400
1
  case clang::ARM::BI_InterlockedDecrement64:
1401
1
    return MSVCIntrin::_InterlockedDecrement;
1402
1
  case clang::ARM::BI_InterlockedIncrement64:
1403
1
    return MSVCIntrin::_InterlockedIncrement;
1404
1
  case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1405
2
  case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1406
5
  case clang::ARM::BI_InterlockedExchangeAdd_acq:
1407
6
  case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1408
6
    return MSVCIntrin::_InterlockedExchangeAdd_acq;
1409
1
  case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1410
2
  case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1411
5
  case clang::ARM::BI_InterlockedExchangeAdd_rel:
1412
6
  case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1413
6
    return MSVCIntrin::_InterlockedExchangeAdd_rel;
1414
1
  case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1415
2
  case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1416
5
  case clang::ARM::BI_InterlockedExchangeAdd_nf:
1417
6
  case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1418
6
    return MSVCIntrin::_InterlockedExchangeAdd_nf;
1419
1
  case clang::ARM::BI_InterlockedExchange8_acq:
1420
2
  case clang::ARM::BI_InterlockedExchange16_acq:
1421
5
  case clang::ARM::BI_InterlockedExchange_acq:
1422
6
  case clang::ARM::BI_InterlockedExchange64_acq:
1423
6
    return MSVCIntrin::_InterlockedExchange_acq;
1424
1
  case clang::ARM::BI_InterlockedExchange8_rel:
1425
2
  case clang::ARM::BI_InterlockedExchange16_rel:
1426
5
  case clang::ARM::BI_InterlockedExchange_rel:
1427
6
  case clang::ARM::BI_InterlockedExchange64_rel:
1428
6
    return MSVCIntrin::_InterlockedExchange_rel;
1429
1
  case clang::ARM::BI_InterlockedExchange8_nf:
1430
2
  case clang::ARM::BI_InterlockedExchange16_nf:
1431
5
  case clang::ARM::BI_InterlockedExchange_nf:
1432
6
  case clang::ARM::BI_InterlockedExchange64_nf:
1433
6
    return MSVCIntrin::_InterlockedExchange_nf;
1434
1
  case clang::ARM::BI_InterlockedCompareExchange8_acq:
1435
2
  case clang::ARM::BI_InterlockedCompareExchange16_acq:
1436
5
  case clang::ARM::BI_InterlockedCompareExchange_acq:
1437
6
  case clang::ARM::BI_InterlockedCompareExchange64_acq:
1438
6
    return MSVCIntrin::_InterlockedCompareExchange_acq;
1439
1
  case clang::ARM::BI_InterlockedCompareExchange8_rel:
1440
2
  case clang::ARM::BI_InterlockedCompareExchange16_rel:
1441
5
  case clang::ARM::BI_InterlockedCompareExchange_rel:
1442
6
  case clang::ARM::BI_InterlockedCompareExchange64_rel:
1443
6
    return MSVCIntrin::_InterlockedCompareExchange_rel;
1444
1
  case clang::ARM::BI_InterlockedCompareExchange8_nf:
1445
2
  case clang::ARM::BI_InterlockedCompareExchange16_nf:
1446
5
  case clang::ARM::BI_InterlockedCompareExchange_nf:
1447
6
  case clang::ARM::BI_InterlockedCompareExchange64_nf:
1448
6
    return MSVCIntrin::_InterlockedCompareExchange_nf;
1449
1
  case clang::ARM::BI_InterlockedOr8_acq:
1450
2
  case clang::ARM::BI_InterlockedOr16_acq:
1451
5
  case clang::ARM::BI_InterlockedOr_acq:
1452
6
  case clang::ARM::BI_InterlockedOr64_acq:
1453
6
    return MSVCIntrin::_InterlockedOr_acq;
1454
1
  case clang::ARM::BI_InterlockedOr8_rel:
1455
2
  case clang::ARM::BI_InterlockedOr16_rel:
1456
5
  case clang::ARM::BI_InterlockedOr_rel:
1457
6
  case clang::ARM::BI_InterlockedOr64_rel:
1458
6
    return MSVCIntrin::_InterlockedOr_rel;
1459
1
  case clang::ARM::BI_InterlockedOr8_nf:
1460
2
  case clang::ARM::BI_InterlockedOr16_nf:
1461
5
  case clang::ARM::BI_InterlockedOr_nf:
1462
6
  case clang::ARM::BI_InterlockedOr64_nf:
1463
6
    return MSVCIntrin::_InterlockedOr_nf;
1464
1
  case clang::ARM::BI_InterlockedXor8_acq:
1465
2
  case clang::ARM::BI_InterlockedXor16_acq:
1466
5
  case clang::ARM::BI_InterlockedXor_acq:
1467
6
  case clang::ARM::BI_InterlockedXor64_acq:
1468
6
    return MSVCIntrin::_InterlockedXor_acq;
1469
1
  case clang::ARM::BI_InterlockedXor8_rel:
1470
2
  case clang::ARM::BI_InterlockedXor16_rel:
1471
5
  case clang::ARM::BI_InterlockedXor_rel:
1472
6
  case clang::ARM::BI_InterlockedXor64_rel:
1473
6
    return MSVCIntrin::_InterlockedXor_rel;
1474
1
  case clang::ARM::BI_InterlockedXor8_nf:
1475
2
  case clang::ARM::BI_InterlockedXor16_nf:
1476
5
  case clang::ARM::BI_InterlockedXor_nf:
1477
6
  case clang::ARM::BI_InterlockedXor64_nf:
1478
6
    return MSVCIntrin::_InterlockedXor_nf;
1479
1
  case clang::ARM::BI_InterlockedAnd8_acq:
1480
2
  case clang::ARM::BI_InterlockedAnd16_acq:
1481
5
  case clang::ARM::BI_InterlockedAnd_acq:
1482
6
  case clang::ARM::BI_InterlockedAnd64_acq:
1483
6
    return MSVCIntrin::_InterlockedAnd_acq;
1484
1
  case clang::ARM::BI_InterlockedAnd8_rel:
1485
2
  case clang::ARM::BI_InterlockedAnd16_rel:
1486
5
  case clang::ARM::BI_InterlockedAnd_rel:
1487
6
  case clang::ARM::BI_InterlockedAnd64_rel:
1488
6
    return MSVCIntrin::_InterlockedAnd_rel;
1489
1
  case clang::ARM::BI_InterlockedAnd8_nf:
1490
2
  case clang::ARM::BI_InterlockedAnd16_nf:
1491
5
  case clang::ARM::BI_InterlockedAnd_nf:
1492
6
  case clang::ARM::BI_InterlockedAnd64_nf:
1493
6
    return MSVCIntrin::_InterlockedAnd_nf;
1494
1
  case clang::ARM::BI_InterlockedIncrement16_acq:
1495
4
  case clang::ARM::BI_InterlockedIncrement_acq:
1496
5
  case clang::ARM::BI_InterlockedIncrement64_acq:
1497
5
    return MSVCIntrin::_InterlockedIncrement_acq;
1498
1
  case clang::ARM::BI_InterlockedIncrement16_rel:
1499
4
  case clang::ARM::BI_InterlockedIncrement_rel:
1500
5
  case clang::ARM::BI_InterlockedIncrement64_rel:
1501
5
    return MSVCIntrin::_InterlockedIncrement_rel;
1502
1
  case clang::ARM::BI_InterlockedIncrement16_nf:
1503
4
  case clang::ARM::BI_InterlockedIncrement_nf:
1504
5
  case clang::ARM::BI_InterlockedIncrement64_nf:
1505
5
    return MSVCIntrin::_InterlockedIncrement_nf;
1506
1
  case clang::ARM::BI_InterlockedDecrement16_acq:
1507
4
  case clang::ARM::BI_InterlockedDecrement_acq:
1508
5
  case clang::ARM::BI_InterlockedDecrement64_acq:
1509
5
    return MSVCIntrin::_InterlockedDecrement_acq;
1510
1
  case clang::ARM::BI_InterlockedDecrement16_rel:
1511
4
  case clang::ARM::BI_InterlockedDecrement_rel:
1512
5
  case clang::ARM::BI_InterlockedDecrement64_rel:
1513
5
    return MSVCIntrin::_InterlockedDecrement_rel;
1514
1
  case clang::ARM::BI_InterlockedDecrement16_nf:
1515
4
  case clang::ARM::BI_InterlockedDecrement_nf:
1516
5
  case clang::ARM::BI_InterlockedDecrement64_nf:
1517
5
    return MSVCIntrin::_InterlockedDecrement_nf;
1518
6.56k
  }
1519
0
  llvm_unreachable("must return from switch");
1520
0
}
1521
1522
static std::optional<CodeGenFunction::MSVCIntrin>
1523
3.97k
translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1524
3.97k
  using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1525
3.97k
  switch (BuiltinID) {
1526
3.76k
  default:
1527
3.76k
    return std::nullopt;
1528
5
  case clang::AArch64::BI_BitScanForward:
1529
6
  case clang::AArch64::BI_BitScanForward64:
1530
6
    return MSVCIntrin::_BitScanForward;
1531
5
  case clang::AArch64::BI_BitScanReverse:
1532
6
  case clang::AArch64::BI_BitScanReverse64:
1533
6
    return MSVCIntrin::_BitScanReverse;
1534
1
  case clang::AArch64::BI_InterlockedAnd64:
1535
1
    return MSVCIntrin::_InterlockedAnd;
1536
1
  case clang::AArch64::BI_InterlockedExchange64:
1537
1
    return MSVCIntrin::_InterlockedExchange;
1538
1
  case clang::AArch64::BI_InterlockedExchangeAdd64:
1539
1
    return MSVCIntrin::_InterlockedExchangeAdd;
1540
1
  case clang::AArch64::BI_InterlockedExchangeSub64:
1541
1
    return MSVCIntrin::_InterlockedExchangeSub;
1542
1
  case clang::AArch64::BI_InterlockedOr64:
1543
1
    return MSVCIntrin::_InterlockedOr;
1544
1
  case clang::AArch64::BI_InterlockedXor64:
1545
1
    return MSVCIntrin::_InterlockedXor;
1546
1
  case clang::AArch64::BI_InterlockedDecrement64:
1547
1
    return MSVCIntrin::_InterlockedDecrement;
1548
1
  case clang::AArch64::BI_InterlockedIncrement64:
1549
1
    return MSVCIntrin::_InterlockedIncrement;
1550
1
  case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1551
2
  case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1552
7
  case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1553
8
  case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1554
8
    return MSVCIntrin::_InterlockedExchangeAdd_acq;
1555
1
  case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1556
2
  case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1557
7
  case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1558
8
  case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1559
8
    return MSVCIntrin::_InterlockedExchangeAdd_rel;
1560
1
  case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1561
2
  case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1562
7
  case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1563
8
  case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1564
8
    return MSVCIntrin::_InterlockedExchangeAdd_nf;
1565
1
  case clang::AArch64::BI_InterlockedExchange8_acq:
1566
2
  case clang::AArch64::BI_InterlockedExchange16_acq:
1567
7
  case clang::AArch64::BI_InterlockedExchange_acq:
1568
8
  case clang::AArch64::BI_InterlockedExchange64_acq:
1569
8
    return MSVCIntrin::_InterlockedExchange_acq;
1570
1
  case clang::AArch64::BI_InterlockedExchange8_rel:
1571
2
  case clang::AArch64::BI_InterlockedExchange16_rel:
1572
7
  case clang::AArch64::BI_InterlockedExchange_rel:
1573
8
  case clang::AArch64::BI_InterlockedExchange64_rel:
1574
8
    return MSVCIntrin::_InterlockedExchange_rel;
1575
1
  case clang::AArch64::BI_InterlockedExchange8_nf:
1576
2
  case clang::AArch64::BI_InterlockedExchange16_nf:
1577
7
  case clang::AArch64::BI_InterlockedExchange_nf:
1578
8
  case clang::AArch64::BI_InterlockedExchange64_nf:
1579
8
    return MSVCIntrin::_InterlockedExchange_nf;
1580
1
  case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1581
2
  case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1582
7
  case clang::AArch64::BI_InterlockedCompareExchange_acq:
1583
8
  case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1584
8
    return MSVCIntrin::_InterlockedCompareExchange_acq;
1585
1
  case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1586
2
  case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1587
7
  case clang::AArch64::BI_InterlockedCompareExchange_rel:
1588
8
  case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1589
8
    return MSVCIntrin::_InterlockedCompareExchange_rel;
1590
1
  case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1591
2
  case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1592
7
  case clang::AArch64::BI_InterlockedCompareExchange_nf:
1593
8
  case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1594
8
    return MSVCIntrin::_InterlockedCompareExchange_nf;
1595
1
  case clang::AArch64::BI_InterlockedCompareExchange128:
1596
1
    return MSVCIntrin::_InterlockedCompareExchange128;
1597
1
  case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1598
1
    return MSVCIntrin::_InterlockedCompareExchange128_acq;
1599
1
  case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1600
1
    return MSVCIntrin::_InterlockedCompareExchange128_nf;
1601
1
  case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1602
1
    return MSVCIntrin::_InterlockedCompareExchange128_rel;
1603
1
  case clang::AArch64::BI_InterlockedOr8_acq:
1604
2
  case clang::AArch64::BI_InterlockedOr16_acq:
1605
7
  case clang::AArch64::BI_InterlockedOr_acq:
1606
8
  case clang::AArch64::BI_InterlockedOr64_acq:
1607
8
    return MSVCIntrin::_InterlockedOr_acq;
1608
1
  case clang::AArch64::BI_InterlockedOr8_rel:
1609
2
  case clang::AArch64::BI_InterlockedOr16_rel:
1610
7
  case clang::AArch64::BI_InterlockedOr_rel:
1611
8
  case clang::AArch64::BI_InterlockedOr64_rel:
1612
8
    return MSVCIntrin::_InterlockedOr_rel;
1613
1
  case clang::AArch64::BI_InterlockedOr8_nf:
1614
2
  case clang::AArch64::BI_InterlockedOr16_nf:
1615
7
  case clang::AArch64::BI_InterlockedOr_nf:
1616
8
  case clang::AArch64::BI_InterlockedOr64_nf:
1617
8
    return MSVCIntrin::_InterlockedOr_nf;
1618
1
  case clang::AArch64::BI_InterlockedXor8_acq:
1619
2
  case clang::AArch64::BI_InterlockedXor16_acq:
1620
7
  case clang::AArch64::BI_InterlockedXor_acq:
1621
8
  case clang::AArch64::BI_InterlockedXor64_acq:
1622
8
    return MSVCIntrin::_InterlockedXor_acq;
1623
1
  case clang::AArch64::BI_InterlockedXor8_rel:
1624
2
  case clang::AArch64::BI_InterlockedXor16_rel:
1625
7
  case clang::AArch64::BI_InterlockedXor_rel:
1626
8
  case clang::AArch64::BI_InterlockedXor64_rel:
1627
8
    return MSVCIntrin::_InterlockedXor_rel;
1628
1
  case clang::AArch64::BI_InterlockedXor8_nf:
1629
2
  case clang::AArch64::BI_InterlockedXor16_nf:
1630
7
  case clang::AArch64::BI_InterlockedXor_nf:
1631
8
  case clang::AArch64::BI_InterlockedXor64_nf:
1632
8
    return MSVCIntrin::_InterlockedXor_nf;
1633
1
  case clang::AArch64::BI_InterlockedAnd8_acq:
1634
2
  case clang::AArch64::BI_InterlockedAnd16_acq:
1635
7
  case clang::AArch64::BI_InterlockedAnd_acq:
1636
8
  case clang::AArch64::BI_InterlockedAnd64_acq:
1637
8
    return MSVCIntrin::_InterlockedAnd_acq;
1638
1
  case clang::AArch64::BI_InterlockedAnd8_rel:
1639
2
  case clang::AArch64::BI_InterlockedAnd16_rel:
1640
7
  case clang::AArch64::BI_InterlockedAnd_rel:
1641
8
  case clang::AArch64::BI_InterlockedAnd64_rel:
1642
8
    return MSVCIntrin::_InterlockedAnd_rel;
1643
1
  case clang::AArch64::BI_InterlockedAnd8_nf:
1644
2
  case clang::AArch64::BI_InterlockedAnd16_nf:
1645
7
  case clang::AArch64::BI_InterlockedAnd_nf:
1646
8
  case clang::AArch64::BI_InterlockedAnd64_nf:
1647
8
    return MSVCIntrin::_InterlockedAnd_nf;
1648
1
  case clang::AArch64::BI_InterlockedIncrement16_acq:
1649
6
  case clang::AArch64::BI_InterlockedIncrement_acq:
1650
7
  case clang::AArch64::BI_InterlockedIncrement64_acq:
1651
7
    return MSVCIntrin::_InterlockedIncrement_acq;
1652
1
  case clang::AArch64::BI_InterlockedIncrement16_rel:
1653
6
  case clang::AArch64::BI_InterlockedIncrement_rel:
1654
7
  case clang::AArch64::BI_InterlockedIncrement64_rel:
1655
7
    return MSVCIntrin::_InterlockedIncrement_rel;
1656
1
  case clang::AArch64::BI_InterlockedIncrement16_nf:
1657
6
  case clang::AArch64::BI_InterlockedIncrement_nf:
1658
7
  case clang::AArch64::BI_InterlockedIncrement64_nf:
1659
7
    return MSVCIntrin::_InterlockedIncrement_nf;
1660
1
  case clang::AArch64::BI_InterlockedDecrement16_acq:
1661
6
  case clang::AArch64::BI_InterlockedDecrement_acq:
1662
7
  case clang::AArch64::BI_InterlockedDecrement64_acq:
1663
7
    return MSVCIntrin::_InterlockedDecrement_acq;
1664
1
  case clang::AArch64::BI_InterlockedDecrement16_rel:
1665
6
  case clang::AArch64::BI_InterlockedDecrement_rel:
1666
7
  case clang::AArch64::BI_InterlockedDecrement64_rel:
1667
7
    return MSVCIntrin::_InterlockedDecrement_rel;
1668
1
  case clang::AArch64::BI_InterlockedDecrement16_nf:
1669
6
  case clang::AArch64::BI_InterlockedDecrement_nf:
1670
7
  case clang::AArch64::BI_InterlockedDecrement64_nf:
1671
7
    return MSVCIntrin::_InterlockedDecrement_nf;
1672
3.97k
  }
1673
0
  llvm_unreachable("must return from switch");
1674
0
}
1675
1676
static std::optional<CodeGenFunction::MSVCIntrin>
1677
9.19k
translateX86ToMsvcIntrin(unsigned BuiltinID) {
1678
9.19k
  using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1679
9.19k
  switch (BuiltinID) {
1680
9.15k
  default:
1681
9.15k
    return std::nullopt;
1682
6
  case clang::X86::BI_BitScanForward:
1683
11
  case clang::X86::BI_BitScanForward64:
1684
11
    return MSVCIntrin::_BitScanForward;
1685
6
  case clang::X86::BI_BitScanReverse:
1686
11
  case clang::X86::BI_BitScanReverse64:
1687
11
    return MSVCIntrin::_BitScanReverse;
1688
2
  case clang::X86::BI_InterlockedAnd64:
1689
2
    return MSVCIntrin::_InterlockedAnd;
1690
1
  case clang::X86::BI_InterlockedCompareExchange128:
1691
1
    return MSVCIntrin::_InterlockedCompareExchange128;
1692
2
  case clang::X86::BI_InterlockedExchange64:
1693
2
    return MSVCIntrin::_InterlockedExchange;
1694
2
  case clang::X86::BI_InterlockedExchangeAdd64:
1695
2
    return MSVCIntrin::_InterlockedExchangeAdd;
1696
2
  case clang::X86::BI_InterlockedExchangeSub64:
1697
2
    return MSVCIntrin::_InterlockedExchangeSub;
1698
2
  case clang::X86::BI_InterlockedOr64:
1699
2
    return MSVCIntrin::_InterlockedOr;
1700
2
  case clang::X86::BI_InterlockedXor64:
1701
2
    return MSVCIntrin::_InterlockedXor;
1702
2
  case clang::X86::BI_InterlockedDecrement64:
1703
2
    return MSVCIntrin::_InterlockedDecrement;
1704
2
  case clang::X86::BI_InterlockedIncrement64:
1705
2
    return MSVCIntrin::_InterlockedIncrement;
1706
9.19k
  }
1707
0
  llvm_unreachable("must return from switch");
1708
0
}
1709
1710
// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1711
Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1712
579
                                            const CallExpr *E) {
1713
579
  switch (BuiltinID) {
1714
21
  case MSVCIntrin::_BitScanForward:
1715
42
  case MSVCIntrin::_BitScanReverse: {
1716
42
    Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1717
42
    Value *ArgValue = EmitScalarExpr(E->getArg(1));
1718
1719
42
    llvm::Type *ArgType = ArgValue->getType();
1720
42
    llvm::Type *IndexType = IndexAddress.getElementType();
1721
42
    llvm::Type *ResultType = ConvertType(E->getType());
1722
1723
42
    Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1724
42
    Value *ResZero = llvm::Constant::getNullValue(ResultType);
1725
42
    Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1726
1727
42
    BasicBlock *Begin = Builder.GetInsertBlock();
1728
42
    BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1729
42
    Builder.SetInsertPoint(End);
1730
42
    PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1731
1732
42
    Builder.SetInsertPoint(Begin);
1733
42
    Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1734
42
    BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1735
42
    Builder.CreateCondBr(IsZero, End, NotZero);
1736
42
    Result->addIncoming(ResZero, Begin);
1737
1738
42
    Builder.SetInsertPoint(NotZero);
1739
1740
42
    if (BuiltinID == MSVCIntrin::_BitScanForward) {
1741
21
      Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1742
21
      Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1743
21
      ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1744
21
      Builder.CreateStore(ZeroCount, IndexAddress, false);
1745
21
    } else {
1746
21
      unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1747
21
      Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1748
1749
21
      Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1750
21
      Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1751
21
      ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1752
21
      Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1753
21
      Builder.CreateStore(Index, IndexAddress, false);
1754
21
    }
1755
42
    Builder.CreateBr(End);
1756
42
    Result->addIncoming(ResOne, NotZero);
1757
1758
42
    Builder.SetInsertPoint(End);
1759
42
    return Result;
1760
21
  }
1761
26
  case MSVCIntrin::_InterlockedAnd:
1762
26
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1763
30
  case MSVCIntrin::_InterlockedExchange:
1764
30
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1765
26
  case MSVCIntrin::_InterlockedExchangeAdd:
1766
26
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1767
26
  case MSVCIntrin::_InterlockedExchangeSub:
1768
26
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1769
26
  case MSVCIntrin::_InterlockedOr:
1770
26
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1771
26
  case MSVCIntrin::_InterlockedXor:
1772
26
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1773
14
  case MSVCIntrin::_InterlockedExchangeAdd_acq:
1774
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1775
14
                                 AtomicOrdering::Acquire);
1776
14
  case MSVCIntrin::_InterlockedExchangeAdd_rel:
1777
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1778
14
                                 AtomicOrdering::Release);
1779
14
  case MSVCIntrin::_InterlockedExchangeAdd_nf:
1780
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1781
14
                                 AtomicOrdering::Monotonic);
1782
14
  case MSVCIntrin::_InterlockedExchange_acq:
1783
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1784
14
                                 AtomicOrdering::Acquire);
1785
14
  case MSVCIntrin::_InterlockedExchange_rel:
1786
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1787
14
                                 AtomicOrdering::Release);
1788
14
  case MSVCIntrin::_InterlockedExchange_nf:
1789
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1790
14
                                 AtomicOrdering::Monotonic);
1791
14
  case MSVCIntrin::_InterlockedCompareExchange_acq:
1792
14
    return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1793
14
  case MSVCIntrin::_InterlockedCompareExchange_rel:
1794
14
    return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1795
14
  case MSVCIntrin::_InterlockedCompareExchange_nf:
1796
14
    return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1797
2
  case MSVCIntrin::_InterlockedCompareExchange128:
1798
2
    return EmitAtomicCmpXchg128ForMSIntrin(
1799
2
        *this, E, AtomicOrdering::SequentiallyConsistent);
1800
1
  case MSVCIntrin::_InterlockedCompareExchange128_acq:
1801
1
    return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1802
1
  case MSVCIntrin::_InterlockedCompareExchange128_rel:
1803
1
    return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1804
1
  case MSVCIntrin::_InterlockedCompareExchange128_nf:
1805
1
    return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1806
14
  case MSVCIntrin::_InterlockedOr_acq:
1807
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1808
14
                                 AtomicOrdering::Acquire);
1809
14
  case MSVCIntrin::_InterlockedOr_rel:
1810
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1811
14
                                 AtomicOrdering::Release);
1812
14
  case MSVCIntrin::_InterlockedOr_nf:
1813
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1814
14
                                 AtomicOrdering::Monotonic);
1815
14
  case MSVCIntrin::_InterlockedXor_acq:
1816
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1817
14
                                 AtomicOrdering::Acquire);
1818
14
  case MSVCIntrin::_InterlockedXor_rel:
1819
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1820
14
                                 AtomicOrdering::Release);
1821
14
  case MSVCIntrin::_InterlockedXor_nf:
1822
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1823
14
                                 AtomicOrdering::Monotonic);
1824
14
  case MSVCIntrin::_InterlockedAnd_acq:
1825
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1826
14
                                 AtomicOrdering::Acquire);
1827
14
  case MSVCIntrin::_InterlockedAnd_rel:
1828
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1829
14
                                 AtomicOrdering::Release);
1830
14
  case MSVCIntrin::_InterlockedAnd_nf:
1831
14
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1832
14
                                 AtomicOrdering::Monotonic);
1833
12
  case MSVCIntrin::_InterlockedIncrement_acq:
1834
12
    return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1835
12
  case MSVCIntrin::_InterlockedIncrement_rel:
1836
12
    return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1837
12
  case MSVCIntrin::_InterlockedIncrement_nf:
1838
12
    return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1839
12
  case MSVCIntrin::_InterlockedDecrement_acq:
1840
12
    return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1841
12
  case MSVCIntrin::_InterlockedDecrement_rel:
1842
12
    return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1843
12
  case MSVCIntrin::_InterlockedDecrement_nf:
1844
12
    return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1845
1846
22
  case MSVCIntrin::_InterlockedDecrement:
1847
22
    return EmitAtomicDecrementValue(*this, E);
1848
22
  case MSVCIntrin::_InterlockedIncrement:
1849
22
    return EmitAtomicIncrementValue(*this, E);
1850
1851
4
  case MSVCIntrin::__fastfail: {
1852
    // Request immediate process termination from the kernel. The instruction
1853
    // sequences to do this are documented on MSDN:
1854
    // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1855
4
    llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1856
4
    StringRef Asm, Constraints;
1857
4
    switch (ISA) {
1858
0
    default:
1859
0
      ErrorUnsupported(E, "__fastfail call for this architecture");
1860
0
      break;
1861
1
    case llvm::Triple::x86:
1862
2
    case llvm::Triple::x86_64:
1863
2
      Asm = "int $$0x29";
1864
2
      Constraints = "{cx}";
1865
2
      break;
1866
1
    case llvm::Triple::thumb:
1867
1
      Asm = "udf #251";
1868
1
      Constraints = "{r0}";
1869
1
      break;
1870
1
    case llvm::Triple::aarch64:
1871
1
      Asm = "brk #0xF003";
1872
1
      Constraints = "{w0}";
1873
4
    }
1874
4
    llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1875
4
    llvm::InlineAsm *IA =
1876
4
        llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1877
4
    llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1878
4
        getLLVMContext(), llvm::AttributeList::FunctionIndex,
1879
4
        llvm::Attribute::NoReturn);
1880
4
    llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1881
4
    CI->setAttributes(NoReturnAttr);
1882
4
    return CI;
1883
4
  }
1884
579
  }
1885
0
  llvm_unreachable("Incorrect MSVC intrinsic!");
1886
0
}
1887
1888
namespace {
1889
// ARC cleanup for __builtin_os_log_format
1890
struct CallObjCArcUse final : EHScopeStack::Cleanup {
1891
4
  CallObjCArcUse(llvm::Value *object) : object(object) {}
1892
  llvm::Value *object;
1893
1894
4
  void Emit(CodeGenFunction &CGF, Flags flags) override {
1895
4
    CGF.EmitARCIntrinsicUse(object);
1896
4
  }
1897
};
1898
}
1899
1900
Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
1901
83
                                                 BuiltinCheckKind Kind) {
1902
83
  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1903
83
          && "Unsupported builtin check kind");
1904
1905
83
  Value *ArgValue = EmitScalarExpr(E);
1906
83
  if (!SanOpts.has(SanitizerKind::Builtin))
1907
71
    return ArgValue;
1908
1909
12
  SanitizerScope SanScope(this);
1910
12
  Value *Cond = Builder.CreateICmpNE(
1911
12
      ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1912
12
  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
1913
12
            SanitizerHandler::InvalidBuiltin,
1914
12
            {EmitCheckSourceLocation(E->getExprLoc()),
1915
12
             llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
1916
12
            std::nullopt);
1917
12
  return ArgValue;
1918
83
}
1919
1920
22
static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
1921
22
  return CGF.Builder.CreateBinaryIntrinsic(
1922
22
      Intrinsic::abs, ArgValue,
1923
22
      ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
1924
22
}
1925
1926
static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E,
1927
4
                                     bool SanitizeOverflow) {
1928
4
  Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
1929
1930
  // Try to eliminate overflow check.
1931
4
  if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
1932
0
    if (!VCI->isMinSignedValue())
1933
0
      return EmitAbs(CGF, ArgValue, true);
1934
0
  }
1935
1936
4
  CodeGenFunction::SanitizerScope SanScope(&CGF);
1937
1938
4
  Constant *Zero = Constant::getNullValue(ArgValue->getType());
1939
4
  Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
1940
4
      Intrinsic::ssub_with_overflow, Zero, ArgValue);
1941
4
  Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
1942
4
  Value *NotOverflow = CGF.Builder.CreateNot(
1943
4
      CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
1944
1945
  // TODO: support -ftrapv-handler.
1946
4
  if (SanitizeOverflow) {
1947
2
    CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
1948
2
                  SanitizerHandler::NegateOverflow,
1949
2
                  {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
1950
2
                   CGF.EmitCheckTypeDescriptor(E->getType())},
1951
2
                  {ArgValue});
1952
2
  } else
1953
2
    CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
1954
1955
4
  Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
1956
4
  return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
1957
4
}
1958
1959
/// Get the argument type for arguments to os_log_helper.
1960
140
static CanQualType getOSLogArgType(ASTContext &C, int Size) {
1961
140
  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
1962
140
  return C.getCanonicalType(UnsignedTy);
1963
140
}
1964
1965
llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
1966
    const analyze_os_log::OSLogBufferLayout &Layout,
1967
59
    CharUnits BufferAlignment) {
1968
59
  ASTContext &Ctx = getContext();
1969
1970
59
  llvm::SmallString<64> Name;
1971
59
  {
1972
59
    raw_svector_ostream OS(Name);
1973
59
    OS << "__os_log_helper";
1974
59
    OS << "_" << BufferAlignment.getQuantity();
1975
59
    OS << "_" << int(Layout.getSummaryByte());
1976
59
    OS << "_" << int(Layout.getNumArgsByte());
1977
59
    for (const auto &Item : Layout.Items)
1978
86
      OS << "_" << int(Item.getSizeByte()) << "_"
1979
86
         << int(Item.getDescriptorByte());
1980
59
  }
1981
1982
59
  if (llvm::Function *F = CGM.getModule().getFunction(Name))
1983
23
    return F;
1984
1985
36
  llvm::SmallVector<QualType, 4> ArgTys;
1986
36
  FunctionArgList Args;
1987
36
  Args.push_back(ImplicitParamDecl::Create(
1988
36
      Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
1989
36
      ImplicitParamKind::Other));
1990
36
  ArgTys.emplace_back(Ctx.VoidPtrTy);
1991
1992
94
  for (unsigned int I = 0, E = Layout.Items.size(); I < E; 
++I58
) {
1993
58
    char Size = Layout.Items[I].getSizeByte();
1994
58
    if (!Size)
1995
2
      continue;
1996
1997
56
    QualType ArgTy = getOSLogArgType(Ctx, Size);
1998
56
    Args.push_back(ImplicitParamDecl::Create(
1999
56
        Ctx, nullptr, SourceLocation(),
2000
56
        &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2001
56
        ImplicitParamKind::Other));
2002
56
    ArgTys.emplace_back(ArgTy);
2003
56
  }
2004
2005
36
  QualType ReturnTy = Ctx.VoidTy;
2006
2007
  // The helper function has linkonce_odr linkage to enable the linker to merge
2008
  // identical functions. To ensure the merging always happens, 'noinline' is
2009
  // attached to the function when compiling with -Oz.
2010
36
  const CGFunctionInfo &FI =
2011
36
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
2012
36
  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2013
36
  llvm::Function *Fn = llvm::Function::Create(
2014
36
      FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2015
36
  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2016
36
  CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2017
36
  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
2018
36
  Fn->setDoesNotThrow();
2019
2020
  // Attach 'noinline' at -Oz.
2021
36
  if (CGM.getCodeGenOpts().OptimizeSize == 2)
2022
0
    Fn->addFnAttr(llvm::Attribute::NoInline);
2023
2024
36
  auto NL = ApplyDebugLocation::CreateEmpty(*this);
2025
36
  StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2026
2027
  // Create a scope with an artificial location for the body of this function.
2028
36
  auto AL = ApplyDebugLocation::CreateArtificial(*this);
2029
2030
36
  CharUnits Offset;
2031
36
  Address BufAddr =
2032
36
      Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty,
2033
36
              BufferAlignment);
2034
36
  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2035
36
                      Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2036
36
  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2037
36
                      Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2038
2039
36
  unsigned I = 1;
2040
58
  for (const auto &Item : Layout.Items) {
2041
58
    Builder.CreateStore(
2042
58
        Builder.getInt8(Item.getDescriptorByte()),
2043
58
        Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2044
58
    Builder.CreateStore(
2045
58
        Builder.getInt8(Item.getSizeByte()),
2046
58
        Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2047
2048
58
    CharUnits Size = Item.size();
2049
58
    if (!Size.getQuantity())
2050
2
      continue;
2051
2052
56
    Address Arg = GetAddrOfLocalVar(Args[I]);
2053
56
    Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2054
56
    Addr = Addr.withElementType(Arg.getElementType());
2055
56
    Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
2056
56
    Offset += Size;
2057
56
    ++I;
2058
56
  }
2059
2060
36
  FinishFunction();
2061
2062
36
  return Fn;
2063
59
}
2064
2065
59
RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
2066
59
  assert(E.getNumArgs() >= 2 &&
2067
59
         "__builtin_os_log_format takes at least 2 arguments");
2068
59
  ASTContext &Ctx = getContext();
2069
59
  analyze_os_log::OSLogBufferLayout Layout;
2070
59
  analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
2071
59
  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2072
59
  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2073
2074
  // Ignore argument 1, the format string. It is not currently used.
2075
59
  CallArgList Args;
2076
59
  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
2077
2078
86
  for (const auto &Item : Layout.Items) {
2079
86
    int Size = Item.getSizeByte();
2080
86
    if (!Size)
2081
2
      continue;
2082
2083
84
    llvm::Value *ArgVal;
2084
2085
84
    if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2086
4
      uint64_t Val = 0;
2087
16
      for (unsigned I = 0, E = Item.getMaskType().size(); I < E; 
++I12
)
2088
12
        Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2089
4
      ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2090
80
    } else if (const Expr *TheExpr = Item.getExpr()) {
2091
78
      ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2092
2093
      // If a temporary object that requires destruction after the full
2094
      // expression is passed, push a lifetime-extended cleanup to extend its
2095
      // lifetime to the end of the enclosing block scope.
2096
78
      auto LifetimeExtendObject = [&](const Expr *E) {
2097
14
        E = E->IgnoreParenCasts();
2098
        // Extend lifetimes of objects returned by function calls and message
2099
        // sends.
2100
2101
        // FIXME: We should do this in other cases in which temporaries are
2102
        //        created including arguments of non-ARC types (e.g., C++
2103
        //        temporaries).
2104
14
        if (isa<CallExpr>(E) || 
isa<ObjCMessageExpr>(E)10
)
2105
8
          return true;
2106
6
        return false;
2107
14
      };
2108
2109
78
      if (TheExpr->getType()->isObjCRetainableType() &&
2110
78
          
getLangOpts().ObjCAutoRefCount21
&&
LifetimeExtendObject(TheExpr)14
) {
2111
8
        assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2112
8
               "Only scalar can be a ObjC retainable type");
2113
8
        if (!isa<Constant>(ArgVal)) {
2114
8
          CleanupKind Cleanup = getARCCleanupKind();
2115
8
          QualType Ty = TheExpr->getType();
2116
8
          Address Alloca = Address::invalid();
2117
8
          Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2118
8
          ArgVal = EmitARCRetain(Ty, ArgVal);
2119
8
          Builder.CreateStore(ArgVal, Addr);
2120
8
          pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2121
8
                                      CodeGenFunction::destroyARCStrongPrecise,
2122
8
                                      Cleanup & EHCleanup);
2123
2124
          // Push a clang.arc.use call to ensure ARC optimizer knows that the
2125
          // argument has to be alive.
2126
8
          if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2127
4
            pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2128
8
        }
2129
8
      }
2130
78
    } else {
2131
2
      ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2132
2
    }
2133
2134
84
    unsigned ArgValSize =
2135
84
        CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2136
84
    llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2137
84
                                                     ArgValSize);
2138
84
    ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2139
84
    CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2140
    // If ArgVal has type x86_fp80, zero-extend ArgVal.
2141
84
    ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2142
84
    Args.add(RValue::get(ArgVal), ArgTy);
2143
84
  }
2144
2145
59
  const CGFunctionInfo &FI =
2146
59
      CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
2147
59
  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
2148
59
      Layout, BufAddr.getAlignment());
2149
59
  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
2150
59
  return RValue::get(BufAddr.getPointer());
2151
59
}
2152
2153
static bool isSpecialUnsignedMultiplySignedResult(
2154
    unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2155
87
    WidthAndSignedness ResultInfo) {
2156
87
  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2157
87
         
Op1Info.Width == Op2Info.Width44
&&
Op2Info.Width == ResultInfo.Width44
&&
2158
87
         
!Op1Info.Signed38
&&
!Op2Info.Signed21
&&
ResultInfo.Signed21
;
2159
87
}
2160
2161
static RValue EmitCheckedUnsignedMultiplySignedResult(
2162
    CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2163
    const clang::Expr *Op2, WidthAndSignedness Op2Info,
2164
    const clang::Expr *ResultArg, QualType ResultQTy,
2165
9
    WidthAndSignedness ResultInfo) {
2166
9
  assert(isSpecialUnsignedMultiplySignedResult(
2167
9
             Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2168
9
         "Cannot specialize this multiply");
2169
2170
9
  llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2171
9
  llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2172
2173
9
  llvm::Value *HasOverflow;
2174
9
  llvm::Value *Result = EmitOverflowIntrinsic(
2175
9
      CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2176
2177
  // The intrinsic call will detect overflow when the value is > UINT_MAX,
2178
  // however, since the original builtin had a signed result, we need to report
2179
  // an overflow when the result is greater than INT_MAX.
2180
9
  auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2181
9
  llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2182
2183
9
  llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2184
9
  HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2185
2186
9
  bool isVolatile =
2187
9
      ResultArg->getType()->getPointeeType().isVolatileQualified();
2188
9
  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2189
9
  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2190
9
                          isVolatile);
2191
9
  return RValue::get(HasOverflow);
2192
9
}
2193
2194
/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2195
static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2196
                                       WidthAndSignedness Op1Info,
2197
                                       WidthAndSignedness Op2Info,
2198
132
                                       WidthAndSignedness ResultInfo) {
2199
132
  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2200
132
         
std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width89
&&
2201
132
         
Op1Info.Signed != Op2Info.Signed83
;
2202
132
}
2203
2204
/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2205
/// the generic checked-binop irgen.
2206
static RValue
2207
EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
2208
                             WidthAndSignedness Op1Info, const clang::Expr *Op2,
2209
                             WidthAndSignedness Op2Info,
2210
                             const clang::Expr *ResultArg, QualType ResultQTy,
2211
27
                             WidthAndSignedness ResultInfo) {
2212
27
  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2213
27
                                    Op2Info, ResultInfo) &&
2214
27
         "Not a mixed-sign multipliction we can specialize");
2215
2216
  // Emit the signed and unsigned operands.
2217
27
  const clang::Expr *SignedOp = Op1Info.Signed ? 
Op115
:
Op212
;
2218
27
  const clang::Expr *UnsignedOp = Op1Info.Signed ? 
Op215
:
Op112
;
2219
27
  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2220
27
  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2221
27
  unsigned SignedOpWidth = Op1Info.Signed ? 
Op1Info.Width15
:
Op2Info.Width12
;
2222
27
  unsigned UnsignedOpWidth = Op1Info.Signed ? 
Op2Info.Width15
:
Op1Info.Width12
;
2223
2224
  // One of the operands may be smaller than the other. If so, [s|z]ext it.
2225
27
  if (SignedOpWidth < UnsignedOpWidth)
2226
3
    Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2227
27
  if (UnsignedOpWidth < SignedOpWidth)
2228
3
    Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2229
2230
27
  llvm::Type *OpTy = Signed->getType();
2231
27
  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2232
27
  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2233
27
  llvm::Type *ResTy = ResultPtr.getElementType();
2234
27
  unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2235
2236
  // Take the absolute value of the signed operand.
2237
27
  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2238
27
  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2239
27
  llvm::Value *AbsSigned =
2240
27
      CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2241
2242
  // Perform a checked unsigned multiplication.
2243
27
  llvm::Value *UnsignedOverflow;
2244
27
  llvm::Value *UnsignedResult =
2245
27
      EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2246
27
                            Unsigned, UnsignedOverflow);
2247
2248
27
  llvm::Value *Overflow, *Result;
2249
27
  if (ResultInfo.Signed) {
2250
    // Signed overflow occurs if the result is greater than INT_MAX or lesser
2251
    // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2252
21
    auto IntMax =
2253
21
        llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2254
21
    llvm::Value *MaxResult =
2255
21
        CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2256
21
                              CGF.Builder.CreateZExt(IsNegative, OpTy));
2257
21
    llvm::Value *SignedOverflow =
2258
21
        CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2259
21
    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2260
2261
    // Prepare the signed result (possibly by negating it).
2262
21
    llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2263
21
    llvm::Value *SignedResult =
2264
21
        CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2265
21
    Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2266
21
  } else {
2267
    // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2268
6
    llvm::Value *Underflow = CGF.Builder.CreateAnd(
2269
6
        IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2270
6
    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2271
6
    if (ResultInfo.Width < OpWidth) {
2272
3
      auto IntMax =
2273
3
          llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2274
3
      llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2275
3
          UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2276
3
      Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2277
3
    }
2278
2279
    // Negate the product if it would be negative in infinite precision.
2280
6
    Result = CGF.Builder.CreateSelect(
2281
6
        IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2282
2283
6
    Result = CGF.Builder.CreateTrunc(Result, ResTy);
2284
6
  }
2285
27
  assert(Overflow && Result && "Missing overflow or result");
2286
2287
27
  bool isVolatile =
2288
27
      ResultArg->getType()->getPointeeType().isVolatileQualified();
2289
27
  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2290
27
                          isVolatile);
2291
27
  return RValue::get(Overflow);
2292
27
}
2293
2294
static bool
2295
TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
2296
44
                              llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2297
44
  if (const auto *Arr = Ctx.getAsArrayType(Ty))
2298
6
    Ty = Ctx.getBaseElementType(Arr);
2299
2300
44
  const auto *Record = Ty->getAsCXXRecordDecl();
2301
44
  if (!Record)
2302
12
    return false;
2303
2304
  // We've already checked this type, or are in the process of checking it.
2305
32
  if (!Seen.insert(Record).second)
2306
0
    return false;
2307
2308
32
  assert(Record->hasDefinition() &&
2309
32
         "Incomplete types should already be diagnosed");
2310
2311
32
  if (Record->isDynamicClass())
2312
11
    return true;
2313
2314
21
  for (FieldDecl *F : Record->fields()) {
2315
20
    if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2316
5
      return true;
2317
20
  }
2318
16
  return false;
2319
21
}
2320
2321
/// Determine if the specified type requires laundering by checking if it is a
2322
/// dynamic class type or contains a subobject which is a dynamic class type.
2323
52
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
2324
52
  if (!CGM.getCodeGenOpts().StrictVTablePointers)
2325
28
    return false;
2326
24
  llvm::SmallPtrSet<const Decl *, 16> Seen;
2327
24
  return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2328
52
}
2329
2330
180
RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2331
180
  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2332
180
  llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2333
2334
  // The builtin's shift arg may have a different type than the source arg and
2335
  // result, but the LLVM intrinsic uses the same type for all values.
2336
180
  llvm::Type *Ty = Src->getType();
2337
180
  ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2338
2339
  // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2340
180
  unsigned IID = IsRotateRight ? 
Intrinsic::fshr88
:
Intrinsic::fshl92
;
2341
180
  Function *F = CGM.getIntrinsic(IID, Ty);
2342
180
  return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2343
180
}
2344
2345
// Map math builtins for long-double to f128 version.
2346
96
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2347
96
  switch (BuiltinID) {
2348
0
#define MUTATE_LDBL(func) \
2349
59
  case Builtin::BI__builtin_##func##l: \
2350
59
    return Builtin::BI__builtin_##func##f128;
2351
1
  MUTATE_LDBL
(0
sqrt)
2352
1
  MUTATE_LDBL
(0
cbrt)
2353
1
  MUTATE_LDBL
(0
fabs)
2354
1
  MUTATE_LDBL
(0
log)
2355
1
  MUTATE_LDBL
(0
log2)
2356
1
  MUTATE_LDBL
(0
log10)
2357
1
  MUTATE_LDBL
(0
log1p)
2358
1
  MUTATE_LDBL
(0
logb)
2359
1
  MUTATE_LDBL
(0
exp)
2360
1
  MUTATE_LDBL
(0
exp2)
2361
1
  MUTATE_LDBL
(0
expm1)
2362
1
  MUTATE_LDBL
(0
fdim)
2363
1
  MUTATE_LDBL
(0
hypot)
2364
1
  MUTATE_LDBL
(0
ilogb)
2365
1
  MUTATE_LDBL
(0
pow)
2366
1
  MUTATE_LDBL
(0
fmin)
2367
1
  MUTATE_LDBL
(0
fmax)
2368
1
  MUTATE_LDBL
(0
ceil)
2369
1
  MUTATE_LDBL
(0
trunc)
2370
1
  MUTATE_LDBL
(0
rint)
2371
1
  MUTATE_LDBL
(0
nearbyint)
2372
1
  MUTATE_LDBL
(0
round)
2373
1
  MUTATE_LDBL
(0
floor)
2374
1
  MUTATE_LDBL
(0
lround)
2375
1
  MUTATE_LDBL
(0
llround)
2376
1
  MUTATE_LDBL
(0
lrint)
2377
1
  MUTATE_LDBL
(0
llrint)
2378
1
  MUTATE_LDBL
(0
fmod)
2379
1
  MUTATE_LDBL
(0
modf)
2380
1
  MUTATE_LDBL
(0
nan)
2381
1
  MUTATE_LDBL
(0
nans)
2382
0
  MUTATE_LDBL(inf)
2383
1
  MUTATE_LDBL
(0
fma)
2384
1
  MUTATE_LDBL
(0
sin)
2385
1
  MUTATE_LDBL
(0
cos)
2386
1
  MUTATE_LDBL
(0
tan)
2387
1
  MUTATE_LDBL
(0
sinh)
2388
1
  MUTATE_LDBL
(0
cosh)
2389
1
  MUTATE_LDBL
(0
tanh)
2390
1
  MUTATE_LDBL
(0
asin)
2391
1
  MUTATE_LDBL
(0
acos)
2392
1
  MUTATE_LDBL
(0
atan)
2393
1
  MUTATE_LDBL
(0
asinh)
2394
1
  MUTATE_LDBL
(0
acosh)
2395
1
  MUTATE_LDBL
(0
atanh)
2396
1
  MUTATE_LDBL
(0
atan2)
2397
1
  MUTATE_LDBL
(0
erf)
2398
1
  MUTATE_LDBL
(0
erfc)
2399
1
  MUTATE_LDBL
(0
ldexp)
2400
1
  MUTATE_LDBL
(0
frexp)
2401
0
  MUTATE_LDBL(huge_val)
2402
1
  MUTATE_LDBL
(0
copysign)
2403
1
  MUTATE_LDBL
(0
nextafter)
2404
2
  MUTATE_LDBL
(0
nexttoward)
2405
1
  MUTATE_LDBL
(0
remainder)
2406
1
  MUTATE_LDBL
(0
remquo)
2407
1
  MUTATE_LDBL
(0
scalbln)
2408
1
  MUTATE_LDBL
(0
scalbn)
2409
1
  MUTATE_LDBL
(0
tgamma)
2410
1
  MUTATE_LDBL
(0
lgamma)
2411
0
#undef MUTATE_LDBL
2412
37
  default:
2413
37
    return BuiltinID;
2414
96
  }
2415
96
}
2416
2417
static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2418
82
                               Value *V) {
2419
82
  if (CGF.Builder.getIsFPConstrained() &&
2420
82
      
CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore20
) {
2421
20
    if (Value *Result =
2422
20
            CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2423
0
      return Result;
2424
20
  }
2425
82
  return nullptr;
2426
82
}
2427
2428
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
2429
1
                                              const FunctionDecl *FD) {
2430
1
  auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2431
1
  auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2432
1
  auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2433
2434
1
  SmallVector<Value *, 16> Args;
2435
1
  for (auto &&FormalTy : FnTy->params())
2436
0
    Args.push_back(llvm::PoisonValue::get(FormalTy));
2437
2438
1
  return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2439
1
}
2440
2441
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2442
                                        const CallExpr *E,
2443
114k
                                        ReturnValueSlot ReturnValue) {
2444
114k
  const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2445
  // See if we can constant fold this builtin.  If so, don't emit it at all.
2446
  // TODO: Extend this handling to all builtin calls that we can constant-fold.
2447
114k
  Expr::EvalResult Result;
2448
114k
  if (E->isPRValue() && 
E->EvaluateAsRValue(Result, CGM.getContext())98.6k
&&
2449
114k
      
!Result.hasSideEffects()1.01k
) {
2450
1.01k
    if (Result.Val.isInt())
2451
601
      return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2452
601
                                                Result.Val.getInt()));
2453
413
    if (Result.Val.isFloat())
2454
206
      return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2455
206
                                               Result.Val.getFloat()));
2456
413
  }
2457
2458
  // If current long-double semantics is IEEE 128-bit, replace math builtins
2459
  // of long-double with f128 equivalent.
2460
  // TODO: This mutation should also be applied to other targets other than PPC,
2461
  // after backend supports IEEE 128-bit style libcalls.
2462
113k
  if (getTarget().getTriple().isPPC64() &&
2463
113k
      
&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()628
)
2464
96
    BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2465
2466
  // If the builtin has been declared explicitly with an assembler label,
2467
  // disable the specialized emitting below. Ideally we should communicate the
2468
  // rename in IR, or at least avoid generating the intrinsic calls that are
2469
  // likely to get lowered to the renamed library functions.
2470
113k
  const unsigned BuiltinIDIfNoAsmLabel =
2471
113k
      FD->hasAttr<AsmLabelAttr>() ? 
0161
:
BuiltinID113k
;
2472
2473
113k
  std::optional<bool> ErrnoOverriden;
2474
  // ErrnoOverriden is true if math-errno is overriden via the
2475
  // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2476
  // which implies math-errno.
2477
113k
  if (E->hasStoredFPFeatures()) {
2478
635
    FPOptionsOverride OP = E->getFPFeatures();
2479
635
    if (OP.hasMathErrnoOverride())
2480
13
      ErrnoOverriden = OP.getMathErrnoOverride();
2481
635
  }
2482
  // True if 'atttibute__((optnone)) is used. This attibute overrides
2483
  // fast-math which implies math-errno.
2484
113k
  bool OptNone = CurFuncDecl && 
CurFuncDecl->hasAttr<OptimizeNoneAttr>()113k
;
2485
2486
  // True if we are compiling at -O2 and errno has been disabled
2487
  // using the '#pragma float_control(precise, off)', and
2488
  // attribute opt-none hasn't been seen.
2489
113k
  bool ErrnoOverridenToFalseWithOpt =
2490
113k
       ErrnoOverriden.has_value() && 
!ErrnoOverriden.value()13
&&
!OptNone6
&&
2491
113k
       
CGM.getCodeGenOpts().OptimizationLevel != 03
;
2492
2493
  // There are LLVM math intrinsics/instructions corresponding to math library
2494
  // functions except the LLVM op will never set errno while the math library
2495
  // might. Also, math builtins have the same semantics as their math library
2496
  // twins. Thus, we can transform math library and builtin calls to their
2497
  // LLVM counterparts if the call is marked 'const' (known to never set errno).
2498
  // In case FP exceptions are enabled, the experimental versions of the
2499
  // intrinsics model those.
2500
113k
  bool ConstAlways =
2501
113k
      getContext().BuiltinInfo.isConst(BuiltinID);
2502
2503
  // There's a special case with the fma builtins where they are always const
2504
  // if the target environment is GNU or the target is OS is Windows and we're
2505
  // targeting the MSVCRT.dll environment.
2506
  // FIXME: This list can be become outdated. Need to find a way to get it some
2507
  // other way.
2508
113k
  switch (BuiltinID) {
2509
17
  case Builtin::BI__builtin_fma:
2510
39
  case Builtin::BI__builtin_fmaf:
2511
49
  case Builtin::BI__builtin_fmal:
2512
61
  case Builtin::BIfma:
2513
72
  case Builtin::BIfmaf:
2514
82
  case Builtin::BIfmal: {
2515
82
    auto &Trip = CGM.getTriple();
2516
82
    if (Trip.isGNUEnvironment() || 
Trip.isOSMSVCRT()76
)
2517
12
      ConstAlways = true;
2518
82
    break;
2519
72
  }
2520
113k
  default:
2521
113k
    break;
2522
113k
  }
2523
2524
113k
  bool ConstWithoutErrnoAndExceptions =
2525
113k
      getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
2526
113k
  bool ConstWithoutExceptions =
2527
113k
      getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
2528
2529
  // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2530
  // disabled.
2531
  // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2532
  // or attributes that affect math-errno should prevent or allow math
2533
  // intrincs to be generated. Intrinsics are generated:
2534
  //   1- In fast math mode, unless math-errno is overriden
2535
  //      via '#pragma float_control(precise, on)', or via an
2536
  //      'attribute__((optnone))'.
2537
  //   2- If math-errno was enabled on command line but overriden
2538
  //      to false via '#pragma float_control(precise, off))' and
2539
  //      'attribute__((optnone))' hasn't been used.
2540
  //   3- If we are compiling with optimization and errno has been disabled
2541
  //      via '#pragma float_control(precise, off)', and
2542
  //      'attribute__((optnone))' hasn't been used.
2543
2544
113k
  bool ConstWithoutErrnoOrExceptions =
2545
113k
      ConstWithoutErrnoAndExceptions || 
ConstWithoutExceptions110k
;
2546
113k
  bool GenerateIntrinsics =
2547
113k
      (ConstAlways && 
!OptNone37.6k
) ||
2548
113k
      
(75.8k
!getLangOpts().MathErrno75.8k
&&
2549
75.8k
       
!(74.1k
ErrnoOverriden.has_value()74.1k
&&
ErrnoOverriden.value()7
) &&
!OptNone74.1k
);
2550
113k
  if (!GenerateIntrinsics) {
2551
1.69k
    GenerateIntrinsics =
2552
1.69k
        ConstWithoutErrnoOrExceptions && 
!ConstWithoutErrnoAndExceptions1.46k
;
2553
1.69k
    if (!GenerateIntrinsics)
2554
1.68k
      GenerateIntrinsics =
2555
1.68k
          ConstWithoutErrnoOrExceptions &&
2556
1.68k
          
(1.45k
!getLangOpts().MathErrno1.45k
&&
2557
1.45k
           
!(2
ErrnoOverriden.has_value()2
&&
ErrnoOverriden.value()2
) &&
!OptNone1
);
2558
1.69k
    if (!GenerateIntrinsics)
2559
1.68k
      GenerateIntrinsics =
2560
1.68k
          ConstWithoutErrnoOrExceptions && 
ErrnoOverridenToFalseWithOpt1.45k
;
2561
1.69k
  }
2562
113k
  if (GenerateIntrinsics) {
2563
111k
    switch (BuiltinIDIfNoAsmLabel) {
2564
7
    case Builtin::BIceil:
2565
14
    case Builtin::BIceilf:
2566
21
    case Builtin::BIceill:
2567
33
    case Builtin::BI__builtin_ceil:
2568
49
    case Builtin::BI__builtin_ceilf:
2569
50
    case Builtin::BI__builtin_ceilf16:
2570
62
    case Builtin::BI__builtin_ceill:
2571
70
    case Builtin::BI__builtin_ceilf128:
2572
70
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2573
70
                                   Intrinsic::ceil,
2574
70
                                   Intrinsic::experimental_constrained_ceil));
2575
2576
8
    case Builtin::BIcopysign:
2577
15
    case Builtin::BIcopysignf:
2578
22
    case Builtin::BIcopysignl:
2579
34
    case Builtin::BI__builtin_copysign:
2580
45
    case Builtin::BI__builtin_copysignf:
2581
46
    case Builtin::BI__builtin_copysignf16:
2582
57
    case Builtin::BI__builtin_copysignl:
2583
64
    case Builtin::BI__builtin_copysignf128:
2584
64
      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2585
2586
6
    case Builtin::BIcos:
2587
12
    case Builtin::BIcosf:
2588
15
    case Builtin::BIcosl:
2589
18
    case Builtin::BI__builtin_cos:
2590
21
    case Builtin::BI__builtin_cosf:
2591
22
    case Builtin::BI__builtin_cosf16:
2592
25
    case Builtin::BI__builtin_cosl:
2593
28
    case Builtin::BI__builtin_cosf128:
2594
28
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2595
28
                                   Intrinsic::cos,
2596
28
                                   Intrinsic::experimental_constrained_cos));
2597
2598
7
    case Builtin::BIexp:
2599
12
    case Builtin::BIexpf:
2600
17
    case Builtin::BIexpl:
2601
20
    case Builtin::BI__builtin_exp:
2602
26
    case Builtin::BI__builtin_expf:
2603
27
    case Builtin::BI__builtin_expf16:
2604
30
    case Builtin::BI__builtin_expl:
2605
33
    case Builtin::BI__builtin_expf128:
2606
33
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2607
33
                                   Intrinsic::exp,
2608
33
                                   Intrinsic::experimental_constrained_exp));
2609
2610
3
    case Builtin::BIexp2:
2611
6
    case Builtin::BIexp2f:
2612
9
    case Builtin::BIexp2l:
2613
12
    case Builtin::BI__builtin_exp2:
2614
18
    case Builtin::BI__builtin_exp2f:
2615
19
    case Builtin::BI__builtin_exp2f16:
2616
22
    case Builtin::BI__builtin_exp2l:
2617
25
    case Builtin::BI__builtin_exp2f128:
2618
25
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2619
25
                                   Intrinsic::exp2,
2620
25
                                   Intrinsic::experimental_constrained_exp2));
2621
3
    case Builtin::BI__builtin_exp10:
2622
6
    case Builtin::BI__builtin_exp10f:
2623
7
    case Builtin::BI__builtin_exp10f16:
2624
10
    case Builtin::BI__builtin_exp10l:
2625
13
    case Builtin::BI__builtin_exp10f128: {
2626
      // TODO: strictfp support
2627
13
      if (Builder.getIsFPConstrained())
2628
4
        break;
2629
9
      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
2630
13
    }
2631
10
    case Builtin::BIfabs:
2632
18
    case Builtin::BIfabsf:
2633
25
    case Builtin::BIfabsl:
2634
59
    case Builtin::BI__builtin_fabs:
2635
77
    case Builtin::BI__builtin_fabsf:
2636
78
    case Builtin::BI__builtin_fabsf16:
2637
91
    case Builtin::BI__builtin_fabsl:
2638
98
    case Builtin::BI__builtin_fabsf128:
2639
98
      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2640
2641
28
    case Builtin::BIfloor:
2642
35
    case Builtin::BIfloorf:
2643
42
    case Builtin::BIfloorl:
2644
61
    case Builtin::BI__builtin_floor:
2645
73
    case Builtin::BI__builtin_floorf:
2646
74
    case Builtin::BI__builtin_floorf16:
2647
86
    case Builtin::BI__builtin_floorl:
2648
94
    case Builtin::BI__builtin_floorf128:
2649
94
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2650
94
                                   Intrinsic::floor,
2651
94
                                   Intrinsic::experimental_constrained_floor));
2652
2653
9
    case Builtin::BIfma:
2654
17
    case Builtin::BIfmaf:
2655
24
    case Builtin::BIfmal:
2656
39
    case Builtin::BI__builtin_fma:
2657
59
    case Builtin::BI__builtin_fmaf:
2658
71
    case Builtin::BI__builtin_fmaf16:
2659
76
    case Builtin::BI__builtin_fmal:
2660
79
    case Builtin::BI__builtin_fmaf128:
2661
79
      return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
2662
79
                                   Intrinsic::fma,
2663
79
                                   Intrinsic::experimental_constrained_fma));
2664
2665
7
    case Builtin::BIfmax:
2666
14
    case Builtin::BIfmaxf:
2667
21
    case Builtin::BIfmaxl:
2668
43
    case Builtin::BI__builtin_fmax:
2669
60
    case Builtin::BI__builtin_fmaxf:
2670
61
    case Builtin::BI__builtin_fmaxf16:
2671
73
    case Builtin::BI__builtin_fmaxl:
2672
81
    case Builtin::BI__builtin_fmaxf128:
2673
81
      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2674
81
                                   Intrinsic::maxnum,
2675
81
                                   Intrinsic::experimental_constrained_maxnum));
2676
2677
7
    case Builtin::BIfmin:
2678
14
    case Builtin::BIfminf:
2679
21
    case Builtin::BIfminl:
2680
41
    case Builtin::BI__builtin_fmin:
2681
58
    case Builtin::BI__builtin_fminf:
2682
59
    case Builtin::BI__builtin_fminf16:
2683
71
    case Builtin::BI__builtin_fminl:
2684
79
    case Builtin::BI__builtin_fminf128:
2685
79
      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2686
79
                                   Intrinsic::minnum,
2687
79
                                   Intrinsic::experimental_constrained_minnum));
2688
2689
    // fmod() is a special-case. It maps to the frem instruction rather than an
2690
    // LLVM intrinsic.
2691
3
    case Builtin::BIfmod:
2692
6
    case Builtin::BIfmodf:
2693
9
    case Builtin::BIfmodl:
2694
14
    case Builtin::BI__builtin_fmod:
2695
19
    case Builtin::BI__builtin_fmodf:
2696
20
    case Builtin::BI__builtin_fmodf16:
2697
25
    case Builtin::BI__builtin_fmodl:
2698
28
    case Builtin::BI__builtin_fmodf128: {
2699
28
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2700
28
      Value *Arg1 = EmitScalarExpr(E->getArg(0));
2701
28
      Value *Arg2 = EmitScalarExpr(E->getArg(1));
2702
28
      return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2703
25
    }
2704
2705
5
    case Builtin::BIlog:
2706
10
    case Builtin::BIlogf:
2707
15
    case Builtin::BIlogl:
2708
18
    case Builtin::BI__builtin_log:
2709
24
    case Builtin::BI__builtin_logf:
2710
25
    case Builtin::BI__builtin_logf16:
2711
28
    case Builtin::BI__builtin_logl:
2712
31
    case Builtin::BI__builtin_logf128:
2713
31
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2714
31
                                   Intrinsic::log,
2715
31
                                   Intrinsic::experimental_constrained_log));
2716
2717
3
    case Builtin::BIlog10:
2718
6
    case Builtin::BIlog10f:
2719
9
    case Builtin::BIlog10l:
2720
12
    case Builtin::BI__builtin_log10:
2721
21
    case Builtin::BI__builtin_log10f:
2722
22
    case Builtin::BI__builtin_log10f16:
2723
25
    case Builtin::BI__builtin_log10l:
2724
28
    case Builtin::BI__builtin_log10f128:
2725
28
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2726
28
                                   Intrinsic::log10,
2727
28
                                   Intrinsic::experimental_constrained_log10));
2728
2729
3
    case Builtin::BIlog2:
2730
6
    case Builtin::BIlog2f:
2731
9
    case Builtin::BIlog2l:
2732
12
    case Builtin::BI__builtin_log2:
2733
15
    case Builtin::BI__builtin_log2f:
2734
16
    case Builtin::BI__builtin_log2f16:
2735
19
    case Builtin::BI__builtin_log2l:
2736
22
    case Builtin::BI__builtin_log2f128:
2737
22
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2738
22
                                   Intrinsic::log2,
2739
22
                                   Intrinsic::experimental_constrained_log2));
2740
2741
7
    case Builtin::BInearbyint:
2742
14
    case Builtin::BInearbyintf:
2743
21
    case Builtin::BInearbyintl:
2744
33
    case Builtin::BI__builtin_nearbyint:
2745
45
    case Builtin::BI__builtin_nearbyintf:
2746
57
    case Builtin::BI__builtin_nearbyintl:
2747
65
    case Builtin::BI__builtin_nearbyintf128:
2748
65
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2749
65
                                Intrinsic::nearbyint,
2750
65
                                Intrinsic::experimental_constrained_nearbyint));
2751
2752
5
    case Builtin::BIpow:
2753
10
    case Builtin::BIpowf:
2754
15
    case Builtin::BIpowl:
2755
18
    case Builtin::BI__builtin_pow:
2756
21
    case Builtin::BI__builtin_powf:
2757
22
    case Builtin::BI__builtin_powf16:
2758
25
    case Builtin::BI__builtin_powl:
2759
28
    case Builtin::BI__builtin_powf128:
2760
28
      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2761
28
                                   Intrinsic::pow,
2762
28
                                   Intrinsic::experimental_constrained_pow));
2763
2764
7
    case Builtin::BIrint:
2765
14
    case Builtin::BIrintf:
2766
21
    case Builtin::BIrintl:
2767
39
    case Builtin::BI__builtin_rint:
2768
57
    case Builtin::BI__builtin_rintf:
2769
58
    case Builtin::BI__builtin_rintf16:
2770
70
    case Builtin::BI__builtin_rintl:
2771
78
    case Builtin::BI__builtin_rintf128:
2772
78
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2773
78
                                   Intrinsic::rint,
2774
78
                                   Intrinsic::experimental_constrained_rint));
2775
2776
7
    case Builtin::BIround:
2777
14
    case Builtin::BIroundf:
2778
21
    case Builtin::BIroundl:
2779
39
    case Builtin::BI__builtin_round:
2780
57
    case Builtin::BI__builtin_roundf:
2781
58
    case Builtin::BI__builtin_roundf16:
2782
70
    case Builtin::BI__builtin_roundl:
2783
78
    case Builtin::BI__builtin_roundf128:
2784
78
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2785
78
                                   Intrinsic::round,
2786
78
                                   Intrinsic::experimental_constrained_round));
2787
2788
0
    case Builtin::BIroundeven:
2789
0
    case Builtin::BIroundevenf:
2790
0
    case Builtin::BIroundevenl:
2791
6
    case Builtin::BI__builtin_roundeven:
2792
12
    case Builtin::BI__builtin_roundevenf:
2793
12
    case Builtin::BI__builtin_roundevenf16:
2794
14
    case Builtin::BI__builtin_roundevenl:
2795
14
    case Builtin::BI__builtin_roundevenf128:
2796
14
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2797
14
                                   Intrinsic::roundeven,
2798
14
                                   Intrinsic::experimental_constrained_roundeven));
2799
2800
5
    case Builtin::BIsin:
2801
11
    case Builtin::BIsinf:
2802
14
    case Builtin::BIsinl:
2803
20
    case Builtin::BI__builtin_sin:
2804
23
    case Builtin::BI__builtin_sinf:
2805
24
    case Builtin::BI__builtin_sinf16:
2806
27
    case Builtin::BI__builtin_sinl:
2807
30
    case Builtin::BI__builtin_sinf128:
2808
30
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2809
30
                                   Intrinsic::sin,
2810
30
                                   Intrinsic::experimental_constrained_sin));
2811
2812
6
    case Builtin::BIsqrt:
2813
13
    case Builtin::BIsqrtf:
2814
18
    case Builtin::BIsqrtl:
2815
43
    case Builtin::BI__builtin_sqrt:
2816
68
    case Builtin::BI__builtin_sqrtf:
2817
70
    case Builtin::BI__builtin_sqrtf16:
2818
75
    case Builtin::BI__builtin_sqrtl:
2819
78
    case Builtin::BI__builtin_sqrtf128:
2820
148
    case Builtin::BI__builtin_elementwise_sqrt: {
2821
148
      llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
2822
148
          *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2823
148
      SetSqrtFPAccuracy(Call);
2824
148
      return RValue::get(Call);
2825
78
    }
2826
7
    case Builtin::BItrunc:
2827
14
    case Builtin::BItruncf:
2828
21
    case Builtin::BItruncl:
2829
33
    case Builtin::BI__builtin_trunc:
2830
45
    case Builtin::BI__builtin_truncf:
2831
46
    case Builtin::BI__builtin_truncf16:
2832
58
    case Builtin::BI__builtin_truncl:
2833
66
    case Builtin::BI__builtin_truncf128:
2834
66
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2835
66
                                   Intrinsic::trunc,
2836
66
                                   Intrinsic::experimental_constrained_trunc));
2837
2838
3
    case Builtin::BIlround:
2839
6
    case Builtin::BIlroundf:
2840
9
    case Builtin::BIlroundl:
2841
14
    case Builtin::BI__builtin_lround:
2842
19
    case Builtin::BI__builtin_lroundf:
2843
24
    case Builtin::BI__builtin_lroundl:
2844
27
    case Builtin::BI__builtin_lroundf128:
2845
27
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2846
27
          *this, E, Intrinsic::lround,
2847
27
          Intrinsic::experimental_constrained_lround));
2848
2849
3
    case Builtin::BIllround:
2850
6
    case Builtin::BIllroundf:
2851
9
    case Builtin::BIllroundl:
2852
12
    case Builtin::BI__builtin_llround:
2853
15
    case Builtin::BI__builtin_llroundf:
2854
18
    case Builtin::BI__builtin_llroundl:
2855
21
    case Builtin::BI__builtin_llroundf128:
2856
21
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2857
21
          *this, E, Intrinsic::llround,
2858
21
          Intrinsic::experimental_constrained_llround));
2859
2860
3
    case Builtin::BIlrint:
2861
6
    case Builtin::BIlrintf:
2862
9
    case Builtin::BIlrintl:
2863
14
    case Builtin::BI__builtin_lrint:
2864
19
    case Builtin::BI__builtin_lrintf:
2865
24
    case Builtin::BI__builtin_lrintl:
2866
27
    case Builtin::BI__builtin_lrintf128:
2867
27
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2868
27
          *this, E, Intrinsic::lrint,
2869
27
          Intrinsic::experimental_constrained_lrint));
2870
2871
3
    case Builtin::BIllrint:
2872
6
    case Builtin::BIllrintf:
2873
9
    case Builtin::BIllrintl:
2874
12
    case Builtin::BI__builtin_llrint:
2875
15
    case Builtin::BI__builtin_llrintf:
2876
18
    case Builtin::BI__builtin_llrintl:
2877
21
    case Builtin::BI__builtin_llrintf128:
2878
21
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2879
21
          *this, E, Intrinsic::llrint,
2880
21
          Intrinsic::experimental_constrained_llrint));
2881
4
    case Builtin::BI__builtin_ldexp:
2882
8
    case Builtin::BI__builtin_ldexpf:
2883
13
    case Builtin::BI__builtin_ldexpl:
2884
16
    case Builtin::BI__builtin_ldexpf16:
2885
18
    case Builtin::BI__builtin_ldexpf128: {
2886
18
      return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
2887
18
          *this, E, Intrinsic::ldexp,
2888
18
          Intrinsic::experimental_constrained_ldexp));
2889
16
    }
2890
110k
    default:
2891
110k
      break;
2892
111k
    }
2893
111k
  }
2894
2895
  // Check NonnullAttribute/NullabilityArg and Alignment.
2896
112k
  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
2897
112k
                          unsigned ParmNum) {
2898
430
    Value *Val = A.getPointer();
2899
430
    EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
2900
430
                        ParmNum);
2901
2902
430
    if (SanOpts.has(SanitizerKind::Alignment) && 
ClSanitizeAlignmentBuiltin36
) {
2903
24
      SanitizerSet SkippedChecks;
2904
24
      SkippedChecks.set(SanitizerKind::All);
2905
24
      SkippedChecks.clear(SanitizerKind::Alignment);
2906
24
      SourceLocation Loc = Arg->getExprLoc();
2907
      // Strip an implicit cast.
2908
24
      if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
2909
22
        if (CE->getCastKind() == CK_BitCast)
2910
14
          Arg = CE->getSubExpr();
2911
24
      EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
2912
24
                    SkippedChecks);
2913
24
    }
2914
430
  };
2915
2916
112k
  switch (BuiltinIDIfNoAsmLabel) {
2917
85.1k
  default: break;
2918
85.1k
  case Builtin::BI__builtin___CFStringMakeConstantString:
2919
173
  case Builtin::BI__builtin___NSStringMakeConstantString:
2920
173
    return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
2921
0
  case Builtin::BI__builtin_stdarg_start:
2922
271
  case Builtin::BI__builtin_va_start:
2923
271
  case Builtin::BI__va_start:
2924
503
  case Builtin::BI__builtin_va_end:
2925
503
    EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
2926
503
                       ? 
EmitScalarExpr(E->getArg(0))0
2927
503
                       : EmitVAListRef(E->getArg(0)).getPointer(),
2928
503
                   BuiltinID != Builtin::BI__builtin_va_end);
2929
503
    return RValue::get(nullptr);
2930
10
  case Builtin::BI__builtin_va_copy: {
2931
10
    Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
2932
10
    Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
2933
10
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
2934
10
    return RValue::get(nullptr);
2935
271
  }
2936
6
  case Builtin::BIabs:
2937
7
  case Builtin::BIlabs:
2938
8
  case Builtin::BIllabs:
2939
18
  case Builtin::BI__builtin_abs:
2940
22
  case Builtin::BI__builtin_labs:
2941
26
  case Builtin::BI__builtin_llabs: {
2942
26
    bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
2943
2944
26
    Value *Result;
2945
26
    switch (getLangOpts().getSignedOverflowBehavior()) {
2946
2
    case LangOptions::SOB_Defined:
2947
2
      Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
2948
2
      break;
2949
22
    case LangOptions::SOB_Undefined:
2950
22
      if (!SanitizeOverflow) {
2951
20
        Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
2952
20
        break;
2953
20
      }
2954
22
      
[[fallthrough]];2
2955
4
    case LangOptions::SOB_Trapping:
2956
      // TODO: Somehow handle the corner case when the address of abs is taken.
2957
4
      Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
2958
4
      break;
2959
26
    }
2960
26
    return RValue::get(Result);
2961
26
  }
2962
4
  case Builtin::BI__builtin_complex: {
2963
4
    Value *Real = EmitScalarExpr(E->getArg(0));
2964
4
    Value *Imag = EmitScalarExpr(E->getArg(1));
2965
4
    return RValue::getComplex({Real, Imag});
2966
26
  }
2967
6
  case Builtin::BI__builtin_conj:
2968
12
  case Builtin::BI__builtin_conjf:
2969
18
  case Builtin::BI__builtin_conjl:
2970
22
  case Builtin::BIconj:
2971
26
  case Builtin::BIconjf:
2972
30
  case Builtin::BIconjl: {
2973
30
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2974
30
    Value *Real = ComplexVal.first;
2975
30
    Value *Imag = ComplexVal.second;
2976
30
    Imag = Builder.CreateFNeg(Imag, "neg");
2977
30
    return RValue::getComplex(std::make_pair(Real, Imag));
2978
26
  }
2979
4
  case Builtin::BI__builtin_creal:
2980
8
  case Builtin::BI__builtin_crealf:
2981
12
  case Builtin::BI__builtin_creall:
2982
16
  case Builtin::BIcreal:
2983
19
  case Builtin::BIcrealf:
2984
22
  case Builtin::BIcreall: {
2985
22
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2986
22
    return RValue::get(ComplexVal.first);
2987
19
  }
2988
2989
20
  case Builtin::BI__builtin_preserve_access_index: {
2990
    // Only enabled preserved access index region when debuginfo
2991
    // is available as debuginfo is needed to preserve user-level
2992
    // access pattern.
2993
20
    if (!getDebugInfo()) {
2994
0
      CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
2995
0
      return RValue::get(EmitScalarExpr(E->getArg(0)));
2996
0
    }
2997
2998
    // Nested builtin_preserve_access_index() not supported
2999
20
    if (IsInPreservedAIRegion) {
3000
0
      CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3001
0
      return RValue::get(EmitScalarExpr(E->getArg(0)));
3002
0
    }
3003
3004
20
    IsInPreservedAIRegion = true;
3005
20
    Value *Res = EmitScalarExpr(E->getArg(0));
3006
20
    IsInPreservedAIRegion = false;
3007
20
    return RValue::get(Res);
3008
20
  }
3009
3010
4
  case Builtin::BI__builtin_cimag:
3011
8
  case Builtin::BI__builtin_cimagf:
3012
12
  case Builtin::BI__builtin_cimagl:
3013
15
  case Builtin::BIcimag:
3014
18
  case Builtin::BIcimagf:
3015
21
  case Builtin::BIcimagl: {
3016
21
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3017
21
    return RValue::get(ComplexVal.second);
3018
18
  }
3019
3020
1
  case Builtin::BI__builtin_clrsb:
3021
1
  case Builtin::BI__builtin_clrsbl:
3022
2
  case Builtin::BI__builtin_clrsbll: {
3023
    // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3024
2
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3025
3026
2
    llvm::Type *ArgType = ArgValue->getType();
3027
2
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3028
3029
2
    llvm::Type *ResultType = ConvertType(E->getType());
3030
2
    Value *Zero = llvm::Constant::getNullValue(ArgType);
3031
2
    Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3032
2
    Value *Inverse = Builder.CreateNot(ArgValue, "not");
3033
2
    Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3034
2
    Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3035
2
    Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3036
2
    Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3037
2
                                   "cast");
3038
2
    return RValue::get(Result);
3039
1
  }
3040
2
  case Builtin::BI__builtin_ctzs:
3041
14
  case Builtin::BI__builtin_ctz:
3042
18
  case Builtin::BI__builtin_ctzl:
3043
31
  case Builtin::BI__builtin_ctzll: {
3044
31
    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
3045
3046
31
    llvm::Type *ArgType = ArgValue->getType();
3047
31
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3048
3049
31
    llvm::Type *ResultType = ConvertType(E->getType());
3050
31
    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
3051
31
    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3052
31
    if (Result->getType() != ResultType)
3053
19
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3054
19
                                     "cast");
3055
31
    return RValue::get(Result);
3056
18
  }
3057
2
  case Builtin::BI__builtin_clzs:
3058
29
  case Builtin::BI__builtin_clz:
3059
40
  case Builtin::BI__builtin_clzl:
3060
52
  case Builtin::BI__builtin_clzll: {
3061
52
    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
3062
3063
52
    llvm::Type *ArgType = ArgValue->getType();
3064
52
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3065
3066
52
    llvm::Type *ResultType = ConvertType(E->getType());
3067
52
    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
3068
52
    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3069
52
    if (Result->getType() != ResultType)
3070
25
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3071
25
                                     "cast");
3072
52
    return RValue::get(Result);
3073
40
  }
3074
2
  case Builtin::BI__builtin_ffs:
3075
4
  case Builtin::BI__builtin_ffsl:
3076
6
  case Builtin::BI__builtin_ffsll: {
3077
    // ffs(x) -> x ? cttz(x) + 1 : 0
3078
6
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3079
3080
6
    llvm::Type *ArgType = ArgValue->getType();
3081
6
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3082
3083
6
    llvm::Type *ResultType = ConvertType(E->getType());
3084
6
    Value *Tmp =
3085
6
        Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3086
6
                          llvm::ConstantInt::get(ArgType, 1));
3087
6
    Value *Zero = llvm::Constant::getNullValue(ArgType);
3088
6
    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3089
6
    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3090
6
    if (Result->getType() != ResultType)
3091
4
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3092
4
                                     "cast");
3093
6
    return RValue::get(Result);
3094
4
  }
3095
2
  case Builtin::BI__builtin_parity:
3096
4
  case Builtin::BI__builtin_parityl:
3097
6
  case Builtin::BI__builtin_parityll: {
3098
    // parity(x) -> ctpop(x) & 1
3099
6
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3100
3101
6
    llvm::Type *ArgType = ArgValue->getType();
3102
6
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3103
3104
6
    llvm::Type *ResultType = ConvertType(E->getType());
3105
6
    Value *Tmp = Builder.CreateCall(F, ArgValue);
3106
6
    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3107
6
    if (Result->getType() != ResultType)
3108
4
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3109
4
                                     "cast");
3110
6
    return RValue::get(Result);
3111
4
  }
3112
10
  case Builtin::BI__lzcnt16:
3113
20
  case Builtin::BI__lzcnt:
3114
30
  case Builtin::BI__lzcnt64: {
3115
30
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3116
3117
30
    llvm::Type *ArgType = ArgValue->getType();
3118
30
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3119
3120
30
    llvm::Type *ResultType = ConvertType(E->getType());
3121
30
    Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3122
30
    if (Result->getType() != ResultType)
3123
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3124
0
                                     "cast");
3125
30
    return RValue::get(Result);
3126
20
  }
3127
10
  case Builtin::BI__popcnt16:
3128
20
  case Builtin::BI__popcnt:
3129
30
  case Builtin::BI__popcnt64:
3130
39
  case Builtin::BI__builtin_popcount:
3131
41
  case Builtin::BI__builtin_popcountl:
3132
50
  case Builtin::BI__builtin_popcountll: {
3133
50
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3134
3135
50
    llvm::Type *ArgType = ArgValue->getType();
3136
50
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3137
3138
50
    llvm::Type *ResultType = ConvertType(E->getType());
3139
50
    Value *Result = Builder.CreateCall(F, ArgValue);
3140
50
    if (Result->getType() != ResultType)
3141
11
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3142
11
                                     "cast");
3143
50
    return RValue::get(Result);
3144
41
  }
3145
7
  case Builtin::BI__builtin_unpredictable: {
3146
    // Always return the argument of __builtin_unpredictable. LLVM does not
3147
    // handle this builtin. Metadata for this builtin should be added directly
3148
    // to instructions such as branches or switches that use it.
3149
7
    return RValue::get(EmitScalarExpr(E->getArg(0)));
3150
41
  }
3151
144
  case Builtin::BI__builtin_expect: {
3152
144
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3153
144
    llvm::Type *ArgType = ArgValue->getType();
3154
3155
144
    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3156
    // Don't generate llvm.expect on -O0 as the backend won't use it for
3157
    // anything.
3158
    // Note, we still IRGen ExpectedValue because it could have side-effects.
3159
144
    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3160
113
      return RValue::get(ArgValue);
3161
3162
31
    Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3163
31
    Value *Result =
3164
31
        Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3165
31
    return RValue::get(Result);
3166
144
  }
3167
12
  case Builtin::BI__builtin_expect_with_probability: {
3168
12
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3169
12
    llvm::Type *ArgType = ArgValue->getType();
3170
3171
12
    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3172
12
    llvm::APFloat Probability(0.0);
3173
12
    const Expr *ProbArg = E->getArg(2);
3174
12
    bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3175
12
    assert(EvalSucceed && "probability should be able to evaluate as float");
3176
12
    (void)EvalSucceed;
3177
12
    bool LoseInfo = false;
3178
12
    Probability.convert(llvm::APFloat::IEEEdouble(),
3179
12
                        llvm::RoundingMode::Dynamic, &LoseInfo);
3180
12
    llvm::Type *Ty = ConvertType(ProbArg->getType());
3181
12
    Constant *Confidence = ConstantFP::get(Ty, Probability);
3182
    // Don't generate llvm.expect.with.probability on -O0 as the backend
3183
    // won't use it for anything.
3184
    // Note, we still IRGen ExpectedValue because it could have side-effects.
3185
12
    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3186
6
      return RValue::get(ArgValue);
3187
3188
6
    Function *FnExpect =
3189
6
        CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3190
6
    Value *Result = Builder.CreateCall(
3191
6
        FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3192
6
    return RValue::get(Result);
3193
12
  }
3194
31
  case Builtin::BI__builtin_assume_aligned: {
3195
31
    const Expr *Ptr = E->getArg(0);
3196
31
    Value *PtrValue = EmitScalarExpr(Ptr);
3197
31
    Value *OffsetValue =
3198
31
      (E->getNumArgs() > 2) ? 
EmitScalarExpr(E->getArg(2))11
:
nullptr20
;
3199
3200
31
    Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3201
31
    ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3202
31
    if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3203
0
      AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
3204
0
                                     llvm::Value::MaximumAlignment);
3205
3206
31
    emitAlignmentAssumption(PtrValue, Ptr,
3207
31
                            /*The expr loc is sufficient.*/ SourceLocation(),
3208
31
                            AlignmentCI, OffsetValue);
3209
31
    return RValue::get(PtrValue);
3210
12
  }
3211
0
  case Builtin::BI__assume:
3212
12
  case Builtin::BI__builtin_assume: {
3213
12
    if (E->getArg(0)->HasSideEffects(getContext()))
3214
4
      return RValue::get(nullptr);
3215
3216
8
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3217
8
    Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3218
8
    Builder.CreateCall(FnAssume, ArgValue);
3219
8
    return RValue::get(nullptr);
3220
12
  }
3221
2
  case Builtin::BI__builtin_assume_separate_storage: {
3222
2
    const Expr *Arg0 = E->getArg(0);
3223
2
    const Expr *Arg1 = E->getArg(1);
3224
3225
2
    Value *Value0 = EmitScalarExpr(Arg0);
3226
2
    Value *Value1 = EmitScalarExpr(Arg1);
3227
3228
2
    Value *Values[] = {Value0, Value1};
3229
2
    OperandBundleDefT<Value *> OBD("separate_storage", Values);
3230
2
    Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3231
2
    return RValue::get(nullptr);
3232
12
  }
3233
42
  case Builtin::BI__arithmetic_fence: {
3234
    // Create the builtin call if FastMath is selected, and the target
3235
    // supports the builtin, otherwise just return the argument.
3236
42
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3237
42
    llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3238
42
    bool isArithmeticFenceEnabled =
3239
42
        FMF.allowReassoc() &&
3240
42
        
getContext().getTargetInfo().checkArithmeticFenceSupported()30
;
3241
42
    QualType ArgType = E->getArg(0)->getType();
3242
42
    if (ArgType->isComplexType()) {
3243
7
      if (isArithmeticFenceEnabled) {
3244
5
        QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3245
5
        ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3246
5
        Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3247
5
                                                    ConvertType(ElementType));
3248
5
        Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3249
5
                                                    ConvertType(ElementType));
3250
5
        return RValue::getComplex(std::make_pair(Real, Imag));
3251
5
      }
3252
2
      ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3253
2
      Value *Real = ComplexVal.first;
3254
2
      Value *Imag = ComplexVal.second;
3255
2
      return RValue::getComplex(std::make_pair(Real, Imag));
3256
7
    }
3257
35
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3258
35
    if (isArithmeticFenceEnabled)
3259
25
      return RValue::get(
3260
25
          Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3261
10
    return RValue::get(ArgValue);
3262
35
  }
3263
12
  case Builtin::BI__builtin_bswap16:
3264
29
  case Builtin::BI__builtin_bswap32:
3265
45
  case Builtin::BI__builtin_bswap64:
3266
46
  case Builtin::BI_byteswap_ushort:
3267
47
  case Builtin::BI_byteswap_ulong:
3268
48
  case Builtin::BI_byteswap_uint64: {
3269
48
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
3270
47
  }
3271
3
  case Builtin::BI__builtin_bitreverse8:
3272
6
  case Builtin::BI__builtin_bitreverse16:
3273
9
  case Builtin::BI__builtin_bitreverse32:
3274
12
  case Builtin::BI__builtin_bitreverse64: {
3275
12
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
3276
9
  }
3277
14
  case Builtin::BI__builtin_rotateleft8:
3278
28
  case Builtin::BI__builtin_rotateleft16:
3279
44
  case Builtin::BI__builtin_rotateleft32:
3280
54
  case Builtin::BI__builtin_rotateleft64:
3281
60
  case Builtin::BI_rotl8: // Microsoft variants of rotate left
3282
66
  case Builtin::BI_rotl16:
3283
76
  case Builtin::BI_rotl:
3284
86
  case Builtin::BI_lrotl:
3285
92
  case Builtin::BI_rotl64:
3286
92
    return emitRotate(E, false);
3287
3288
14
  case Builtin::BI__builtin_rotateright8:
3289
28
  case Builtin::BI__builtin_rotateright16:
3290
42
  case Builtin::BI__builtin_rotateright32:
3291
50
  case Builtin::BI__builtin_rotateright64:
3292
56
  case Builtin::BI_rotr8: // Microsoft variants of rotate right
3293
62
  case Builtin::BI_rotr16:
3294
72
  case Builtin::BI_rotr:
3295
82
  case Builtin::BI_lrotr:
3296
88
  case Builtin::BI_rotr64:
3297
88
    return emitRotate(E, true);
3298
3299
34
  case Builtin::BI__builtin_constant_p: {
3300
34
    llvm::Type *ResultType = ConvertType(E->getType());
3301
3302
34
    const Expr *Arg = E->getArg(0);
3303
34
    QualType ArgType = Arg->getType();
3304
    // FIXME: The allowance for Obj-C pointers and block pointers is historical
3305
    // and likely a mistake.
3306
34
    if (!ArgType->isIntegralOrEnumerationType() && 
!ArgType->isFloatingType()17
&&
3307
34
        
!ArgType->isObjCObjectPointerType()17
&&
!ArgType->isBlockPointerType()13
)
3308
      // Per the GCC documentation, only numeric constants are recognized after
3309
      // inlining.
3310
13
      return RValue::get(ConstantInt::get(ResultType, 0));
3311
3312
21
    if (Arg->HasSideEffects(getContext()))
3313
      // The argument is unevaluated, so be conservative if it might have
3314
      // side-effects.
3315
0
      return RValue::get(ConstantInt::get(ResultType, 0));
3316
3317
21
    Value *ArgValue = EmitScalarExpr(Arg);
3318
21
    if (ArgType->isObjCObjectPointerType()) {
3319
      // Convert Objective-C objects to id because we cannot distinguish between
3320
      // LLVM types for Obj-C classes as they are opaque.
3321
4
      ArgType = CGM.getContext().getObjCIdType();
3322
4
      ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3323
4
    }
3324
21
    Function *F =
3325
21
        CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3326
21
    Value *Result = Builder.CreateCall(F, ArgValue);
3327
21
    if (Result->getType() != ResultType)
3328
21
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3329
21
    return RValue::get(Result);
3330
21
  }
3331
156
  case Builtin::BI__builtin_dynamic_object_size:
3332
294
  case Builtin::BI__builtin_object_size: {
3333
294
    unsigned Type =
3334
294
        E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3335
294
    auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3336
3337
    // We pass this builtin onto the optimizer so that it can figure out the
3338
    // object size in more complex cases.
3339
294
    bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3340
294
    return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3341
294
                                             /*EmittedE=*/nullptr, IsDynamic));
3342
156
  }
3343
25
  case Builtin::BI__builtin_prefetch: {
3344
25
    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3345
    // FIXME: Technically these constants should of type 'int', yes?
3346
25
    RW = (E->getNumArgs() > 1) ? 
EmitScalarExpr(E->getArg(1))21
:
3347
25
      
llvm::ConstantInt::get(Int32Ty, 0)4
;
3348
25
    Locality = (E->getNumArgs() > 2) ? 
EmitScalarExpr(E->getArg(2))18
:
3349
25
      
llvm::ConstantInt::get(Int32Ty, 3)7
;
3350
25
    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3351
25
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3352
25
    Builder.CreateCall(F, {Address, RW, Locality, Data});
3353
25
    return RValue::get(nullptr);
3354
156
  }
3355
2
  case Builtin::BI__builtin_readcyclecounter: {
3356
2
    Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3357
2
    return RValue::get(Builder.CreateCall(F));
3358
156
  }
3359
1
  case Builtin::BI__builtin___clear_cache: {
3360
1
    Value *Begin = EmitScalarExpr(E->getArg(0));
3361
1
    Value *End = EmitScalarExpr(E->getArg(1));
3362
1
    Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3363
1
    return RValue::get(Builder.CreateCall(F, {Begin, End}));
3364
156
  }
3365
53
  case Builtin::BI__builtin_trap:
3366
53
    EmitTrapCall(Intrinsic::trap);
3367
53
    return RValue::get(nullptr);
3368
0
  case Builtin::BI__debugbreak:
3369
0
    EmitTrapCall(Intrinsic::debugtrap);
3370
0
    return RValue::get(nullptr);
3371
34
  case Builtin::BI__builtin_unreachable: {
3372
34
    EmitUnreachable(E->getExprLoc());
3373
3374
    // We do need to preserve an insertion point.
3375
34
    EmitBlock(createBasicBlock("unreachable.cont"));
3376
3377
34
    return RValue::get(nullptr);
3378
156
  }
3379
3380
10
  case Builtin::BI__builtin_powi:
3381
20
  case Builtin::BI__builtin_powif:
3382
30
  case Builtin::BI__builtin_powil: {
3383
30
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3384
30
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3385
3386
30
    if (Builder.getIsFPConstrained()) {
3387
      // FIXME: llvm.powi has 2 mangling types,
3388
      // llvm.experimental.constrained.powi has one.
3389
3
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3390
3
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3391
3
                                     Src0->getType());
3392
3
      return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3393
3
    }
3394
3395
27
    Function *F = CGM.getIntrinsic(Intrinsic::powi,
3396
27
                                   { Src0->getType(), Src1->getType() });
3397
27
    return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3398
30
  }
3399
10
  case Builtin::BI__builtin_frexp:
3400
20
  case Builtin::BI__builtin_frexpf:
3401
31
  case Builtin::BI__builtin_frexpl:
3402
38
  case Builtin::BI__builtin_frexpf128:
3403
39
  case Builtin::BI__builtin_frexpf16:
3404
39
    return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3405
30
  case Builtin::BI__builtin_isgreater:
3406
56
  case Builtin::BI__builtin_isgreaterequal:
3407
83
  case Builtin::BI__builtin_isless:
3408
109
  case Builtin::BI__builtin_islessequal:
3409
135
  case Builtin::BI__builtin_islessgreater:
3410
186
  case Builtin::BI__builtin_isunordered: {
3411
    // Ordered comparisons: we know the arguments to these are matching scalar
3412
    // floating point values.
3413
186
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3414
186
    Value *LHS = EmitScalarExpr(E->getArg(0));
3415
186
    Value *RHS = EmitScalarExpr(E->getArg(1));
3416
3417
186
    switch (BuiltinID) {
3418
0
    default: llvm_unreachable("Unknown ordered comparison");
3419
30
    case Builtin::BI__builtin_isgreater:
3420
30
      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3421
30
      break;
3422
26
    case Builtin::BI__builtin_isgreaterequal:
3423
26
      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3424
26
      break;
3425
27
    case Builtin::BI__builtin_isless:
3426
27
      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3427
27
      break;
3428
26
    case Builtin::BI__builtin_islessequal:
3429
26
      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3430
26
      break;
3431
26
    case Builtin::BI__builtin_islessgreater:
3432
26
      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3433
26
      break;
3434
51
    case Builtin::BI__builtin_isunordered:
3435
51
      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3436
51
      break;
3437
186
    }
3438
    // ZExt bool to int type.
3439
186
    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3440
186
  }
3441
3442
36
  case Builtin::BI__builtin_isnan: {
3443
36
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3444
36
    Value *V = EmitScalarExpr(E->getArg(0));
3445
36
    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3446
0
      return RValue::get(Result);
3447
36
    return RValue::get(
3448
36
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3449
36
                           ConvertType(E->getType())));
3450
36
  }
3451
3452
2
  case Builtin::BI__builtin_issignaling: {
3453
2
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3454
2
    Value *V = EmitScalarExpr(E->getArg(0));
3455
2
    return RValue::get(
3456
2
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3457
2
                           ConvertType(E->getType())));
3458
36
  }
3459
3460
26
  case Builtin::BI__builtin_isinf: {
3461
26
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3462
26
    Value *V = EmitScalarExpr(E->getArg(0));
3463
26
    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3464
0
      return RValue::get(Result);
3465
26
    return RValue::get(
3466
26
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3467
26
                           ConvertType(E->getType())));
3468
26
  }
3469
3470
2
  case Builtin::BIfinite:
3471
2
  case Builtin::BI__finite:
3472
2
  case Builtin::BIfinitef:
3473
2
  case Builtin::BI__finitef:
3474
2
  case Builtin::BIfinitel:
3475
2
  case Builtin::BI__finitel:
3476
20
  case Builtin::BI__builtin_isfinite: {
3477
20
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3478
20
    Value *V = EmitScalarExpr(E->getArg(0));
3479
20
    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3480
0
      return RValue::get(Result);
3481
20
    return RValue::get(
3482
20
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3483
20
                           ConvertType(E->getType())));
3484
20
  }
3485
3486
6
  case Builtin::BI__builtin_isnormal: {
3487
6
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3488
6
    Value *V = EmitScalarExpr(E->getArg(0));
3489
6
    return RValue::get(
3490
6
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3491
6
                           ConvertType(E->getType())));
3492
20
  }
3493
3494
2
  case Builtin::BI__builtin_issubnormal: {
3495
2
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3496
2
    Value *V = EmitScalarExpr(E->getArg(0));
3497
2
    return RValue::get(
3498
2
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3499
2
                           ConvertType(E->getType())));
3500
20
  }
3501
3502
2
  case Builtin::BI__builtin_iszero: {
3503
2
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3504
2
    Value *V = EmitScalarExpr(E->getArg(0));
3505
2
    return RValue::get(
3506
2
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3507
2
                           ConvertType(E->getType())));
3508
20
  }
3509
3510
11
  case Builtin::BI__builtin_isfpclass: {
3511
11
    Expr::EvalResult Result;
3512
11
    if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3513
0
      break;
3514
11
    uint64_t Test = Result.Val.getInt().getLimitedValue();
3515
11
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3516
11
    Value *V = EmitScalarExpr(E->getArg(0));
3517
11
    return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3518
11
                                          ConvertType(E->getType())));
3519
11
  }
3520
3521
57
  case Builtin::BI__builtin_nondeterministic_value: {
3522
57
    llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3523
3524
57
    Value *Result = PoisonValue::get(Ty);
3525
57
    Result = Builder.CreateFreeze(Result);
3526
3527
57
    return RValue::get(Result);
3528
11
  }
3529
3530
97
  case Builtin::BI__builtin_elementwise_abs: {
3531
97
    Value *Result;
3532
97
    QualType QT = E->getArg(0)->getType();
3533
3534
97
    if (auto *VecTy = QT->getAs<VectorType>())
3535
78
      QT = VecTy->getElementType();
3536
97
    if (QT->isIntegerType())
3537
69
      Result = Builder.CreateBinaryIntrinsic(
3538
69
          llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3539
69
          Builder.getFalse(), nullptr, "elt.abs");
3540
28
    else
3541
28
      Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3542
3543
97
    return RValue::get(Result);
3544
11
  }
3545
3546
28
  case Builtin::BI__builtin_elementwise_ceil:
3547
28
    return RValue::get(
3548
28
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3549
4
  case Builtin::BI__builtin_elementwise_exp:
3550
4
    return RValue::get(
3551
4
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3552
4
  case Builtin::BI__builtin_elementwise_exp2:
3553
4
    return RValue::get(
3554
4
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3555
20
  case Builtin::BI__builtin_elementwise_log:
3556
20
    return RValue::get(
3557
20
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3558
21
  case Builtin::BI__builtin_elementwise_log2:
3559
21
    return RValue::get(
3560
21
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3561
19
  case Builtin::BI__builtin_elementwise_log10:
3562
19
    return RValue::get(
3563
19
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3564
28
  case Builtin::BI__builtin_elementwise_pow: {
3565
28
    return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
3566
11
  }
3567
55
  case Builtin::BI__builtin_elementwise_bitreverse:
3568
55
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
3569
55
                                        "elt.bitreverse"));
3570
20
  case Builtin::BI__builtin_elementwise_cos:
3571
20
    return RValue::get(
3572
20
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3573
28
  case Builtin::BI__builtin_elementwise_floor:
3574
28
    return RValue::get(
3575
28
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3576
4
  case Builtin::BI__builtin_elementwise_roundeven:
3577
4
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3578
4
                                        "elt.roundeven"));
3579
4
  case Builtin::BI__builtin_elementwise_round:
3580
4
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
3581
4
                                        "elt.round"));
3582
4
  case Builtin::BI__builtin_elementwise_rint:
3583
4
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
3584
4
                                        "elt.rint"));
3585
4
  case Builtin::BI__builtin_elementwise_nearbyint:
3586
4
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
3587
4
                                        "elt.nearbyint"));
3588
20
  case Builtin::BI__builtin_elementwise_sin:
3589
20
    return RValue::get(
3590
20
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3591
3592
20
  case Builtin::BI__builtin_elementwise_trunc:
3593
20
    return RValue::get(
3594
20
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3595
4
  case Builtin::BI__builtin_elementwise_canonicalize:
3596
4
    return RValue::get(
3597
4
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3598
10
  case Builtin::BI__builtin_elementwise_copysign:
3599
10
    return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
3600
15
  case Builtin::BI__builtin_elementwise_fma:
3601
15
    return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma));
3602
69
  case Builtin::BI__builtin_elementwise_add_sat:
3603
138
  case Builtin::BI__builtin_elementwise_sub_sat: {
3604
138
    Value *Op0 = EmitScalarExpr(E->getArg(0));
3605
138
    Value *Op1 = EmitScalarExpr(E->getArg(1));
3606
138
    Value *Result;
3607
138
    assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3608
138
    QualType Ty = E->getArg(0)->getType();
3609
138
    if (auto *VecTy = Ty->getAs<VectorType>())
3610
124
      Ty = VecTy->getElementType();
3611
138
    bool IsSigned = Ty->isSignedIntegerType();
3612
138
    unsigned Opc;
3613
138
    if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3614
69
      Opc = IsSigned ? 
llvm::Intrinsic::sadd_sat36
:
llvm::Intrinsic::uadd_sat33
;
3615
69
    else
3616
69
      Opc = IsSigned ? 
llvm::Intrinsic::ssub_sat36
:
llvm::Intrinsic::usub_sat33
;
3617
138
    Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3618
138
    return RValue::get(Result);
3619
138
  }
3620
3621
178
  case Builtin::BI__builtin_elementwise_max: {
3622
178
    Value *Op0 = EmitScalarExpr(E->getArg(0));
3623
178
    Value *Op1 = EmitScalarExpr(E->getArg(1));
3624
178
    Value *Result;
3625
178
    if (Op0->getType()->isIntOrIntVectorTy()) {
3626
147
      QualType Ty = E->getArg(0)->getType();
3627
147
      if (auto *VecTy = Ty->getAs<VectorType>())
3628
128
        Ty = VecTy->getElementType();
3629
147
      Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3630
147
                                                 ? 
llvm::Intrinsic::smax75
3631
147
                                                 : 
llvm::Intrinsic::umax72
,
3632
147
                                             Op0, Op1, nullptr, "elt.max");
3633
147
    } else
3634
31
      Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3635
178
    return RValue::get(Result);
3636
138
  }
3637
178
  case Builtin::BI__builtin_elementwise_min: {
3638
178
    Value *Op0 = EmitScalarExpr(E->getArg(0));
3639
178
    Value *Op1 = EmitScalarExpr(E->getArg(1));
3640
178
    Value *Result;
3641
178
    if (Op0->getType()->isIntOrIntVectorTy()) {
3642
147
      QualType Ty = E->getArg(0)->getType();
3643
147
      if (auto *VecTy = Ty->getAs<VectorType>())
3644
128
        Ty = VecTy->getElementType();
3645
147
      Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3646
147
                                                 ? 
llvm::Intrinsic::smin75
3647
147
                                                 : 
llvm::Intrinsic::umin72
,
3648
147
                                             Op0, Op1, nullptr, "elt.min");
3649
147
    } else
3650
31
      Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3651
178
    return RValue::get(Result);
3652
138
  }
3653
3654
45
  case Builtin::BI__builtin_reduce_max: {
3655
45
    auto GetIntrinsicID = [](QualType QT) {
3656
45
      if (auto *VecTy = QT->getAs<VectorType>())
3657
45
        QT = VecTy->getElementType();
3658
45
      if (QT->isSignedIntegerType())
3659
22
        return llvm::Intrinsic::vector_reduce_smax;
3660
23
      if (QT->isUnsignedIntegerType())
3661
21
        return llvm::Intrinsic::vector_reduce_umax;
3662
2
      assert(QT->isFloatingType() && "must have a float here");
3663
2
      return llvm::Intrinsic::vector_reduce_fmax;
3664
2
    };
3665
45
    return RValue::get(emitUnaryBuiltin(
3666
45
        *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3667
138
  }
3668
3669
45
  case Builtin::BI__builtin_reduce_min: {
3670
45
    auto GetIntrinsicID = [](QualType QT) {
3671
45
      if (auto *VecTy = QT->getAs<VectorType>())
3672
45
        QT = VecTy->getElementType();
3673
45
      if (QT->isSignedIntegerType())
3674
22
        return llvm::Intrinsic::vector_reduce_smin;
3675
23
      if (QT->isUnsignedIntegerType())
3676
21
        return llvm::Intrinsic::vector_reduce_umin;
3677
2
      assert(QT->isFloatingType() && "must have a float here");
3678
2
      return llvm::Intrinsic::vector_reduce_fmin;
3679
2
    };
3680
3681
45
    return RValue::get(emitUnaryBuiltin(
3682
45
        *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3683
138
  }
3684
3685
24
  case Builtin::BI__builtin_reduce_add:
3686
24
    return RValue::get(emitUnaryBuiltin(
3687
24
        *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3688
24
  case Builtin::BI__builtin_reduce_mul:
3689
24
    return RValue::get(emitUnaryBuiltin(
3690
24
        *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3691
2
  case Builtin::BI__builtin_reduce_xor:
3692
2
    return RValue::get(emitUnaryBuiltin(
3693
2
        *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3694
22
  case Builtin::BI__builtin_reduce_or:
3695
22
    return RValue::get(emitUnaryBuiltin(
3696
22
        *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3697
22
  case Builtin::BI__builtin_reduce_and:
3698
22
    return RValue::get(emitUnaryBuiltin(
3699
22
        *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3700
3701
31
  case Builtin::BI__builtin_matrix_transpose: {
3702
31
    auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3703
31
    Value *MatValue = EmitScalarExpr(E->getArg(0));
3704
31
    MatrixBuilder MB(Builder);
3705
31
    Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3706
31
                                             MatrixTy->getNumColumns());
3707
31
    return RValue::get(Result);
3708
138
  }
3709
3710
36
  case Builtin::BI__builtin_matrix_column_major_load: {
3711
36
    MatrixBuilder MB(Builder);
3712
    // Emit everything that isn't dependent on the first parameter type
3713
36
    Value *Stride = EmitScalarExpr(E->getArg(3));
3714
36
    const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3715
36
    auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3716
36
    assert(PtrTy && "arg0 must be of pointer type");
3717
36
    bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3718
3719
36
    Address Src = EmitPointerWithAlignment(E->getArg(0));
3720
36
    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
3721
36
                        E->getArg(0)->getExprLoc(), FD, 0);
3722
36
    Value *Result = MB.CreateColumnMajorLoad(
3723
36
        Src.getElementType(), Src.getPointer(),
3724
36
        Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3725
36
        ResultTy->getNumRows(), ResultTy->getNumColumns(),
3726
36
        "matrix");
3727
36
    return RValue::get(Result);
3728
36
  }
3729
3730
26
  case Builtin::BI__builtin_matrix_column_major_store: {
3731
26
    MatrixBuilder MB(Builder);
3732
26
    Value *Matrix = EmitScalarExpr(E->getArg(0));
3733
26
    Address Dst = EmitPointerWithAlignment(E->getArg(1));
3734
26
    Value *Stride = EmitScalarExpr(E->getArg(2));
3735
3736
26
    const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
3737
26
    auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
3738
26
    assert(PtrTy && "arg1 must be of pointer type");
3739
26
    bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3740
3741
26
    EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(),
3742
26
                        E->getArg(1)->getExprLoc(), FD, 0);
3743
26
    Value *Result = MB.CreateColumnMajorStore(
3744
26
        Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
3745
26
        Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
3746
26
    return RValue::get(Result);
3747
26
  }
3748
3749
9
  case Builtin::BI__builtin_isinf_sign: {
3750
    // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3751
9
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3752
    // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3753
9
    Value *Arg = EmitScalarExpr(E->getArg(0));
3754
9
    Value *AbsArg = EmitFAbs(*this, Arg);
3755
9
    Value *IsInf = Builder.CreateFCmpOEQ(
3756
9
        AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
3757
9
    Value *IsNeg = EmitSignBit(*this, Arg);
3758
3759
9
    llvm::Type *IntTy = ConvertType(E->getType());
3760
9
    Value *Zero = Constant::getNullValue(IntTy);
3761
9
    Value *One = ConstantInt::get(IntTy, 1);
3762
9
    Value *NegativeOne = ConstantInt::get(IntTy, -1);
3763
9
    Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
3764
9
    Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
3765
9
    return RValue::get(Result);
3766
26
  }
3767
3768
2
  case Builtin::BI__builtin_flt_rounds: {
3769
2
    Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
3770
3771
2
    llvm::Type *ResultType = ConvertType(E->getType());
3772
2
    Value *Result = Builder.CreateCall(F);
3773
2
    if (Result->getType() != ResultType)
3774
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3775
0
                                     "cast");
3776
2
    return RValue::get(Result);
3777
26
  }
3778
3779
4
  case Builtin::BI__builtin_set_flt_rounds: {
3780
4
    Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
3781
3782
4
    Value *V = EmitScalarExpr(E->getArg(0));
3783
4
    Builder.CreateCall(F, V);
3784
4
    return RValue::get(nullptr);
3785
26
  }
3786
3787
1
  case Builtin::BI__builtin_fpclassify: {
3788
1
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3789
    // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3790
1
    Value *V = EmitScalarExpr(E->getArg(5));
3791
1
    llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
3792
3793
    // Create Result
3794
1
    BasicBlock *Begin = Builder.GetInsertBlock();
3795
1
    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
3796
1
    Builder.SetInsertPoint(End);
3797
1
    PHINode *Result =
3798
1
      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
3799
1
                        "fpclassify_result");
3800
3801
    // if (V==0) return FP_ZERO
3802
1
    Builder.SetInsertPoint(Begin);
3803
1
    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
3804
1
                                          "iszero");
3805
1
    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
3806
1
    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
3807
1
    Builder.CreateCondBr(IsZero, End, NotZero);
3808
1
    Result->addIncoming(ZeroLiteral, Begin);
3809
3810
    // if (V != V) return FP_NAN
3811
1
    Builder.SetInsertPoint(NotZero);
3812
1
    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
3813
1
    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
3814
1
    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
3815
1
    Builder.CreateCondBr(IsNan, End, NotNan);
3816
1
    Result->addIncoming(NanLiteral, NotZero);
3817
3818
    // if (fabs(V) == infinity) return FP_INFINITY
3819
1
    Builder.SetInsertPoint(NotNan);
3820
1
    Value *VAbs = EmitFAbs(*this, V);
3821
1
    Value *IsInf =
3822
1
      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
3823
1
                            "isinf");
3824
1
    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
3825
1
    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
3826
1
    Builder.CreateCondBr(IsInf, End, NotInf);
3827
1
    Result->addIncoming(InfLiteral, NotNan);
3828
3829
    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
3830
1
    Builder.SetInsertPoint(NotInf);
3831
1
    APFloat Smallest = APFloat::getSmallestNormalized(
3832
1
        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
3833
1
    Value *IsNormal =
3834
1
      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
3835
1
                            "isnormal");
3836
1
    Value *NormalResult =
3837
1
      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
3838
1
                           EmitScalarExpr(E->getArg(3)));
3839
1
    Builder.CreateBr(End);
3840
1
    Result->addIncoming(NormalResult, NotInf);
3841
3842
    // return Result
3843
1
    Builder.SetInsertPoint(End);
3844
1
    return RValue::get(Result);
3845
26
  }
3846
3847
  // An alloca will always return a pointer to the alloca (stack) address
3848
  // space. This address space need not be the same as the AST / Language
3849
  // default (e.g. in C / C++ auto vars are in the generic address space). At
3850
  // the AST level this is handled within CreateTempAlloca et al., but for the
3851
  // builtin / dynamic alloca we have to handle it here. We use an explicit cast
3852
  // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
3853
5
  case Builtin::BIalloca:
3854
6
  case Builtin::BI_alloca:
3855
12
  case Builtin::BI__builtin_alloca_uninitialized:
3856
32
  case Builtin::BI__builtin_alloca: {
3857
32
    Value *Size = EmitScalarExpr(E->getArg(0));
3858
32
    const TargetInfo &TI = getContext().getTargetInfo();
3859
    // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
3860
32
    const Align SuitableAlignmentInBytes =
3861
32
        CGM.getContext()
3862
32
            .toCharUnitsFromBits(TI.getSuitableAlign())
3863
32
            .getAsAlign();
3864
32
    AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3865
32
    AI->setAlignment(SuitableAlignmentInBytes);
3866
32
    if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
3867
26
      initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
3868
32
    LangAS AAS = getASTAllocaAddressSpace();
3869
32
    LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
3870
32
    if (AAS != EAS) {
3871
6
      llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
3872
6
      return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
3873
6
                                                               EAS, Ty));
3874
6
    }
3875
26
    return RValue::get(AI);
3876
32
  }
3877
3878
6
  case Builtin::BI__builtin_alloca_with_align_uninitialized:
3879
13
  case Builtin::BI__builtin_alloca_with_align: {
3880
13
    Value *Size = EmitScalarExpr(E->getArg(0));
3881
13
    Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
3882
13
    auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
3883
13
    unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
3884
13
    const Align AlignmentInBytes =
3885
13
        CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
3886
13
    AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3887
13
    AI->setAlignment(AlignmentInBytes);
3888
13
    if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
3889
7
      initializeAlloca(*this, AI, Size, AlignmentInBytes);
3890
13
    LangAS AAS = getASTAllocaAddressSpace();
3891
13
    LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
3892
13
    if (AAS != EAS) {
3893
6
      llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
3894
6
      return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
3895
6
                                                               EAS, Ty));
3896
6
    }
3897
7
    return RValue::get(AI);
3898
13
  }
3899
3900
1
  case Builtin::BIbzero:
3901
6
  case Builtin::BI__builtin_bzero: {
3902
6
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
3903
6
    Value *SizeVal = EmitScalarExpr(E->getArg(1));
3904
6
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
3905
6
                        E->getArg(0)->getExprLoc(), FD, 0);
3906
6
    Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
3907
6
    return RValue::get(nullptr);
3908
1
  }
3909
3910
0
  case Builtin::BIbcopy:
3911
0
  case Builtin::BI__builtin_bcopy: {
3912
0
    Address Src = EmitPointerWithAlignment(E->getArg(0));
3913
0
    Address Dest = EmitPointerWithAlignment(E->getArg(1));
3914
0
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
3915
0
    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
3916
0
                        E->getArg(0)->getExprLoc(), FD, 0);
3917
0
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(1)->getType(),
3918
0
                        E->getArg(1)->getExprLoc(), FD, 0);
3919
0
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
3920
0
    return RValue::get(Dest.getPointer());
3921
0
  }
3922
3923
53
  case Builtin::BImemcpy:
3924
136
  case Builtin::BI__builtin_memcpy:
3925
137
  case Builtin::BImempcpy:
3926
137
  case Builtin::BI__builtin_mempcpy: {
3927
137
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
3928
137
    Address Src = EmitPointerWithAlignment(E->getArg(1));
3929
137
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
3930
137
    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
3931
137
    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
3932
137
    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
3933
137
    if (BuiltinID == Builtin::BImempcpy ||
3934
137
        
BuiltinID == Builtin::BI__builtin_mempcpy136
)
3935
1
      return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
3936
1
                                                   Dest.getPointer(), SizeVal));
3937
136
    else
3938
136
      return RValue::get(Dest.getPointer());
3939
137
  }
3940
3941
8
  case Builtin::BI__builtin_memcpy_inline: {
3942
8
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
3943
8
    Address Src = EmitPointerWithAlignment(E->getArg(1));
3944
8
    uint64_t Size =
3945
8
        E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
3946
8
    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
3947
8
    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
3948
8
    Builder.CreateMemCpyInline(Dest, Src, Size);
3949
8
    return RValue::get(nullptr);
3950
137
  }
3951
3952
1
  case Builtin::BI__builtin_char_memchr:
3953
1
    BuiltinID = Builtin::BI__builtin_memchr;
3954
1
    break;
3955
3956
5
  case Builtin::BI__builtin___memcpy_chk: {
3957
    // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
3958
5
    Expr::EvalResult SizeResult, DstSizeResult;
3959
5
    if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3960
5
        
!E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())4
)
3961
3
      break;
3962
2
    llvm::APSInt Size = SizeResult.Val.getInt();
3963
2
    llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3964
2
    if (Size.ugt(DstSize))
3965
0
      break;
3966
2
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
3967
2
    Address Src = EmitPointerWithAlignment(E->getArg(1));
3968
2
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3969
2
    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
3970
2
    return RValue::get(Dest.getPointer());
3971
2
  }
3972
3973
1
  case Builtin::BI__builtin_objc_memmove_collectable: {
3974
1
    Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
3975
1
    Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
3976
1
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
3977
1
    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
3978
1
                                                  DestAddr, SrcAddr, SizeVal);
3979
1
    return RValue::get(DestAddr.getPointer());
3980
2
  }
3981
3982
3
  case Builtin::BI__builtin___memmove_chk: {
3983
    // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
3984
3
    Expr::EvalResult SizeResult, DstSizeResult;
3985
3
    if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3986
3
        !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
3987
2
      break;
3988
1
    llvm::APSInt Size = SizeResult.Val.getInt();
3989
1
    llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3990
1
    if (Size.ugt(DstSize))
3991
0
      break;
3992
1
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
3993
1
    Address Src = EmitPointerWithAlignment(E->getArg(1));
3994
1
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3995
1
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
3996
1
    return RValue::get(Dest.getPointer());
3997
1
  }
3998
3999
15
  case Builtin::BImemmove:
4000
70
  case Builtin::BI__builtin_memmove: {
4001
70
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4002
70
    Address Src = EmitPointerWithAlignment(E->getArg(1));
4003
70
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
4004
70
    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4005
70
    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4006
70
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
4007
70
    return RValue::get(Dest.getPointer());
4008
15
  }
4009
26
  case Builtin::BImemset:
4010
34
  case Builtin::BI__builtin_memset: {
4011
34
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4012
34
    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4013
34
                                         Builder.getInt8Ty());
4014
34
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
4015
34
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
4016
34
                        E->getArg(0)->getExprLoc(), FD, 0);
4017
34
    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4018
34
    return RValue::get(Dest.getPointer());
4019
26
  }
4020
3
  case Builtin::BI__builtin_memset_inline: {
4021
3
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4022
3
    Value *ByteVal =
4023
3
        Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4024
3
    uint64_t Size =
4025
3
        E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4026
3
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
4027
3
                        E->getArg(0)->getExprLoc(), FD, 0);
4028
3
    Builder.CreateMemSetInline(Dest, ByteVal, Size);
4029
3
    return RValue::get(nullptr);
4030
26
  }
4031
3
  case Builtin::BI__builtin___memset_chk: {
4032
    // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4033
3
    Expr::EvalResult SizeResult, DstSizeResult;
4034
3
    if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4035
3
        !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4036
2
      break;
4037
1
    llvm::APSInt Size = SizeResult.Val.getInt();
4038
1
    llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4039
1
    if (Size.ugt(DstSize))
4040
0
      break;
4041
1
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4042
1
    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4043
1
                                         Builder.getInt8Ty());
4044
1
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4045
1
    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4046
1
    return RValue::get(Dest.getPointer());
4047
1
  }
4048
2
  case Builtin::BI__builtin_wmemchr: {
4049
    // The MSVC runtime library does not provide a definition of wmemchr, so we
4050
    // need an inline implementation.
4051
2
    if (!getTarget().getTriple().isOSMSVCRT())
4052
1
      break;
4053
4054
1
    llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4055
1
    Value *Str = EmitScalarExpr(E->getArg(0));
4056
1
    Value *Chr = EmitScalarExpr(E->getArg(1));
4057
1
    Value *Size = EmitScalarExpr(E->getArg(2));
4058
4059
1
    BasicBlock *Entry = Builder.GetInsertBlock();
4060
1
    BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4061
1
    BasicBlock *Next = createBasicBlock("wmemchr.next");
4062
1
    BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4063
1
    Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4064
1
    Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4065
4066
1
    EmitBlock(CmpEq);
4067
1
    PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4068
1
    StrPhi->addIncoming(Str, Entry);
4069
1
    PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4070
1
    SizePhi->addIncoming(Size, Entry);
4071
1
    CharUnits WCharAlign =
4072
1
        getContext().getTypeAlignInChars(getContext().WCharTy);
4073
1
    Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4074
1
    Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4075
1
    Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4076
1
    Builder.CreateCondBr(StrEqChr, Exit, Next);
4077
4078
1
    EmitBlock(Next);
4079
1
    Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4080
1
    Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4081
1
    Value *NextSizeEq0 =
4082
1
        Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4083
1
    Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4084
1
    StrPhi->addIncoming(NextStr, Next);
4085
1
    SizePhi->addIncoming(NextSize, Next);
4086
4087
1
    EmitBlock(Exit);
4088
1
    PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4089
1
    Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4090
1
    Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4091
1
    Ret->addIncoming(FoundChr, CmpEq);
4092
1
    return RValue::get(Ret);
4093
2
  }
4094
2
  case Builtin::BI__builtin_wmemcmp: {
4095
    // The MSVC runtime library does not provide a definition of wmemcmp, so we
4096
    // need an inline implementation.
4097
2
    if (!getTarget().getTriple().isOSMSVCRT())
4098
1
      break;
4099
4100
1
    llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4101
4102
1
    Value *Dst = EmitScalarExpr(E->getArg(0));
4103
1
    Value *Src = EmitScalarExpr(E->getArg(1));
4104
1
    Value *Size = EmitScalarExpr(E->getArg(2));
4105
4106
1
    BasicBlock *Entry = Builder.GetInsertBlock();
4107
1
    BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4108
1
    BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4109
1
    BasicBlock *Next = createBasicBlock("wmemcmp.next");
4110
1
    BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4111
1
    Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4112
1
    Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4113
4114
1
    EmitBlock(CmpGT);
4115
1
    PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4116
1
    DstPhi->addIncoming(Dst, Entry);
4117
1
    PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4118
1
    SrcPhi->addIncoming(Src, Entry);
4119
1
    PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4120
1
    SizePhi->addIncoming(Size, Entry);
4121
1
    CharUnits WCharAlign =
4122
1
        getContext().getTypeAlignInChars(getContext().WCharTy);
4123
1
    Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4124
1
    Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4125
1
    Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4126
1
    Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4127
4128
1
    EmitBlock(CmpLT);
4129
1
    Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4130
1
    Builder.CreateCondBr(DstLtSrc, Exit, Next);
4131
4132
1
    EmitBlock(Next);
4133
1
    Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4134
1
    Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4135
1
    Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4136
1
    Value *NextSizeEq0 =
4137
1
        Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4138
1
    Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4139
1
    DstPhi->addIncoming(NextDst, Next);
4140
1
    SrcPhi->addIncoming(NextSrc, Next);
4141
1
    SizePhi->addIncoming(NextSize, Next);
4142
4143
1
    EmitBlock(Exit);
4144
1
    PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4145
1
    Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4146
1
    Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4147
1
    Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4148
1
    Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4149
1
    return RValue::get(Ret);
4150
2
  }
4151
0
  case Builtin::BI__builtin_dwarf_cfa: {
4152
    // The offset in bytes from the first argument to the CFA.
4153
    //
4154
    // Why on earth is this in the frontend?  Is there any reason at
4155
    // all that the backend can't reasonably determine this while
4156
    // lowering llvm.eh.dwarf.cfa()?
4157
    //
4158
    // TODO: If there's a satisfactory reason, add a target hook for
4159
    // this instead of hard-coding 0, which is correct for most targets.
4160
0
    int32_t Offset = 0;
4161
4162
0
    Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4163
0
    return RValue::get(Builder.CreateCall(F,
4164
0
                                      llvm::ConstantInt::get(Int32Ty, Offset)));
4165
2
  }
4166
3
  case Builtin::BI__builtin_return_address: {
4167
3
    Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4168
3
                                                   getContext().UnsignedIntTy);
4169
3
    Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4170
3
    return RValue::get(Builder.CreateCall(F, Depth));
4171
2
  }
4172
4
  case Builtin::BI_ReturnAddress: {
4173
4
    Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4174
4
    return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4175
2
  }
4176
7
  case Builtin::BI__builtin_frame_address: {
4177
7
    Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4178
7
                                                   getContext().UnsignedIntTy);
4179
7
    Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4180
7
    return RValue::get(Builder.CreateCall(F, Depth));
4181
2
  }
4182
2
  case Builtin::BI__builtin_extract_return_addr: {
4183
2
    Value *Address = EmitScalarExpr(E->getArg(0));
4184
2
    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
4185
2
    return RValue::get(Result);
4186
2
  }
4187
0
  case Builtin::BI__builtin_frob_return_addr: {
4188
0
    Value *Address = EmitScalarExpr(E->getArg(0));
4189
0
    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
4190
0
    return RValue::get(Result);
4191
2
  }
4192
5
  case Builtin::BI__builtin_dwarf_sp_column: {
4193
5
    llvm::IntegerType *Ty
4194
5
      = cast<llvm::IntegerType>(ConvertType(E->getType()));
4195
5
    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
4196
5
    if (Column == -1) {
4197
0
      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4198
0
      return RValue::get(llvm::UndefValue::get(Ty));
4199
0
    }
4200
5
    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4201
5
  }
4202
5
  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4203
5
    Value *Address = EmitScalarExpr(E->getArg(0));
4204
5
    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4205
0
      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4206
5
    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4207
5
  }
4208
0
  case Builtin::BI__builtin_eh_return: {
4209
0
    Value *Int = EmitScalarExpr(E->getArg(0));
4210
0
    Value *Ptr = EmitScalarExpr(E->getArg(1));
4211
4212
0
    llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4213
0
    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4214
0
           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4215
0
    Function *F =
4216
0
        CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4217
0
                                                    : Intrinsic::eh_return_i64);
4218
0
    Builder.CreateCall(F, {Int, Ptr});
4219
0
    Builder.CreateUnreachable();
4220
4221
    // We do need to preserve an insertion point.
4222
0
    EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4223
4224
0
    return RValue::get(nullptr);
4225
0
  }
4226
2
  case Builtin::BI__builtin_unwind_init: {
4227
2
    Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4228
2
    Builder.CreateCall(F);
4229
2
    return RValue::get(nullptr);
4230
0
  }
4231
0
  case Builtin::BI__builtin_extend_pointer: {
4232
    // Extends a pointer to the size of an _Unwind_Word, which is
4233
    // uint64_t on all platforms.  Generally this gets poked into a
4234
    // register and eventually used as an address, so if the
4235
    // addressing registers are wider than pointers and the platform
4236
    // doesn't implicitly ignore high-order bits when doing
4237
    // addressing, we need to make sure we zext / sext based on
4238
    // the platform's expectations.
4239
    //
4240
    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4241
4242
    // Cast the pointer to intptr_t.
4243
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
4244
0
    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4245
4246
    // If that's 64 bits, we're done.
4247
0
    if (IntPtrTy->getBitWidth() == 64)
4248
0
      return RValue::get(Result);
4249
4250
    // Otherwise, ask the codegen data what to do.
4251
0
    if (getTargetHooks().extendPointerWithSExt())
4252
0
      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4253
0
    else
4254
0
      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4255
0
  }
4256
6
  case Builtin::BI__builtin_setjmp: {
4257
    // Buffer is a void**.
4258
6
    Address Buf = EmitPointerWithAlignment(E->getArg(0));
4259
4260
    // Store the frame pointer to the setjmp buffer.
4261
6
    Value *FrameAddr = Builder.CreateCall(
4262
6
        CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4263
6
        ConstantInt::get(Int32Ty, 0));
4264
6
    Builder.CreateStore(FrameAddr, Buf);
4265
4266
    // Store the stack pointer to the setjmp buffer.
4267
6
    Value *StackAddr = Builder.CreateStackSave();
4268
6
    assert(Buf.getPointer()->getType() == StackAddr->getType());
4269
4270
6
    Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4271
6
    Builder.CreateStore(StackAddr, StackSaveSlot);
4272
4273
    // Call LLVM's EH setjmp, which is lightweight.
4274
6
    Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4275
6
    return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
4276
6
  }
4277
8
  case Builtin::BI__builtin_longjmp: {
4278
8
    Value *Buf = EmitScalarExpr(E->getArg(0));
4279
4280
    // Call LLVM's EH longjmp, which is lightweight.
4281
8
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4282
4283
    // longjmp doesn't return; mark this as unreachable.
4284
8
    Builder.CreateUnreachable();
4285
4286
    // We do need to preserve an insertion point.
4287
8
    EmitBlock(createBasicBlock("longjmp.cont"));
4288
4289
8
    return RValue::get(nullptr);
4290
6
  }
4291
52
  case Builtin::BI__builtin_launder: {
4292
52
    const Expr *Arg = E->getArg(0);
4293
52
    QualType ArgTy = Arg->getType()->getPointeeType();
4294
52
    Value *Ptr = EmitScalarExpr(Arg);
4295
52
    if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4296
11
      Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
4297
4298
52
    return RValue::get(Ptr);
4299
6
  }
4300
0
  case Builtin::BI__sync_fetch_and_add:
4301
0
  case Builtin::BI__sync_fetch_and_sub:
4302
0
  case Builtin::BI__sync_fetch_and_or:
4303
0
  case Builtin::BI__sync_fetch_and_and:
4304
0
  case Builtin::BI__sync_fetch_and_xor:
4305
0
  case Builtin::BI__sync_fetch_and_nand:
4306
0
  case Builtin::BI__sync_add_and_fetch:
4307
0
  case Builtin::BI__sync_sub_and_fetch:
4308
0
  case Builtin::BI__sync_and_and_fetch:
4309
0
  case Builtin::BI__sync_or_and_fetch:
4310
0
  case Builtin::BI__sync_xor_and_fetch:
4311
0
  case Builtin::BI__sync_nand_and_fetch:
4312
0
  case Builtin::BI__sync_val_compare_and_swap:
4313
0
  case Builtin::BI__sync_bool_compare_and_swap:
4314
0
  case Builtin::BI__sync_lock_test_and_set:
4315
0
  case Builtin::BI__sync_lock_release:
4316
0
  case Builtin::BI__sync_swap:
4317
0
    llvm_unreachable("Shouldn't make it through sema");
4318
4
  case Builtin::BI__sync_fetch_and_add_1:
4319
8
  case Builtin::BI__sync_fetch_and_add_2:
4320
14
  case Builtin::BI__sync_fetch_and_add_4:
4321
19
  case Builtin::BI__sync_fetch_and_add_8:
4322
22
  case Builtin::BI__sync_fetch_and_add_16:
4323
22
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4324
5
  case Builtin::BI__sync_fetch_and_sub_1:
4325
9
  case Builtin::BI__sync_fetch_and_sub_2:
4326
13
  case Builtin::BI__sync_fetch_and_sub_4:
4327
18
  case Builtin::BI__sync_fetch_and_sub_8:
4328
19
  case Builtin::BI__sync_fetch_and_sub_16:
4329
19
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4330
4
  case Builtin::BI__sync_fetch_and_or_1:
4331
8
  case Builtin::BI__sync_fetch_and_or_2:
4332
13
  case Builtin::BI__sync_fetch_and_or_4:
4333
17
  case Builtin::BI__sync_fetch_and_or_8:
4334
18
  case Builtin::BI__sync_fetch_and_or_16:
4335
18
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4336
4
  case Builtin::BI__sync_fetch_and_and_1:
4337
8
  case Builtin::BI__sync_fetch_and_and_2:
4338
13
  case Builtin::BI__sync_fetch_and_and_4:
4339
17
  case Builtin::BI__sync_fetch_and_and_8:
4340
18
  case Builtin::BI__sync_fetch_and_and_16:
4341
18
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4342
4
  case Builtin::BI__sync_fetch_and_xor_1:
4343
8
  case Builtin::BI__sync_fetch_and_xor_2:
4344
13
  case Builtin::BI__sync_fetch_and_xor_4:
4345
17
  case Builtin::BI__sync_fetch_and_xor_8:
4346
20
  case Builtin::BI__sync_fetch_and_xor_16:
4347
20
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4348
4
  case Builtin::BI__sync_fetch_and_nand_1:
4349
8
  case Builtin::BI__sync_fetch_and_nand_2:
4350
13
  case Builtin::BI__sync_fetch_and_nand_4:
4351
17
  case Builtin::BI__sync_fetch_and_nand_8:
4352
18
  case Builtin::BI__sync_fetch_and_nand_16:
4353
18
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4354
4355
  // Clang extensions: not overloaded yet.
4356
1
  case Builtin::BI__sync_fetch_and_min:
4357
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4358
1
  case Builtin::BI__sync_fetch_and_max:
4359
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4360
1
  case Builtin::BI__sync_fetch_and_umin:
4361
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4362
1
  case Builtin::BI__sync_fetch_and_umax:
4363
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4364
4365
2
  case Builtin::BI__sync_add_and_fetch_1:
4366
4
  case Builtin::BI__sync_add_and_fetch_2:
4367
7
  case Builtin::BI__sync_add_and_fetch_4:
4368
9
  case Builtin::BI__sync_add_and_fetch_8:
4369
10
  case Builtin::BI__sync_add_and_fetch_16:
4370
10
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4371
10
                                llvm::Instruction::Add);
4372
2
  case Builtin::BI__sync_sub_and_fetch_1:
4373
4
  case Builtin::BI__sync_sub_and_fetch_2:
4374
7
  case Builtin::BI__sync_sub_and_fetch_4:
4375
9
  case Builtin::BI__sync_sub_and_fetch_8:
4376
11
  case Builtin::BI__sync_sub_and_fetch_16:
4377
11
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4378
11
                                llvm::Instruction::Sub);
4379
3
  case Builtin::BI__sync_and_and_fetch_1:
4380
5
  case Builtin::BI__sync_and_and_fetch_2:
4381
7
  case Builtin::BI__sync_and_and_fetch_4:
4382
9
  case Builtin::BI__sync_and_and_fetch_8:
4383
10
  case Builtin::BI__sync_and_and_fetch_16:
4384
10
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4385
10
                                llvm::Instruction::And);
4386
3
  case Builtin::BI__sync_or_and_fetch_1:
4387
5
  case Builtin::BI__sync_or_and_fetch_2:
4388
7
  case Builtin::BI__sync_or_and_fetch_4:
4389
9
  case Builtin::BI__sync_or_and_fetch_8:
4390
10
  case Builtin::BI__sync_or_and_fetch_16:
4391
10
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4392
10
                                llvm::Instruction::Or);
4393
3
  case Builtin::BI__sync_xor_and_fetch_1:
4394
5
  case Builtin::BI__sync_xor_and_fetch_2:
4395
8
  case Builtin::BI__sync_xor_and_fetch_4:
4396
10
  case Builtin::BI__sync_xor_and_fetch_8:
4397
11
  case Builtin::BI__sync_xor_and_fetch_16:
4398
11
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4399
11
                                llvm::Instruction::Xor);
4400
3
  case Builtin::BI__sync_nand_and_fetch_1:
4401
5
  case Builtin::BI__sync_nand_and_fetch_2:
4402
7
  case Builtin::BI__sync_nand_and_fetch_4:
4403
9
  case Builtin::BI__sync_nand_and_fetch_8:
4404
12
  case Builtin::BI__sync_nand_and_fetch_16:
4405
12
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4406
12
                                llvm::Instruction::And, true);
4407
4408
3
  case Builtin::BI__sync_val_compare_and_swap_1:
4409
5
  case Builtin::BI__sync_val_compare_and_swap_2:
4410
10
  case Builtin::BI__sync_val_compare_and_swap_4:
4411
13
  case Builtin::BI__sync_val_compare_and_swap_8:
4412
15
  case Builtin::BI__sync_val_compare_and_swap_16:
4413
15
    return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4414
4415
2
  case Builtin::BI__sync_bool_compare_and_swap_1:
4416
4
  case Builtin::BI__sync_bool_compare_and_swap_2:
4417
9
  case Builtin::BI__sync_bool_compare_and_swap_4:
4418
12
  case Builtin::BI__sync_bool_compare_and_swap_8:
4419
14
  case Builtin::BI__sync_bool_compare_and_swap_16:
4420
14
    return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4421
4422
0
  case Builtin::BI__sync_swap_1:
4423
0
  case Builtin::BI__sync_swap_2:
4424
1
  case Builtin::BI__sync_swap_4:
4425
1
  case Builtin::BI__sync_swap_8:
4426
2
  case Builtin::BI__sync_swap_16:
4427
2
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4428
4429
2
  case Builtin::BI__sync_lock_test_and_set_1:
4430
4
  case Builtin::BI__sync_lock_test_and_set_2:
4431
8
  case Builtin::BI__sync_lock_test_and_set_4:
4432
12
  case Builtin::BI__sync_lock_test_and_set_8:
4433
13
  case Builtin::BI__sync_lock_test_and_set_16:
4434
13
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4435
4436
2
  case Builtin::BI__sync_lock_release_1:
4437
4
  case Builtin::BI__sync_lock_release_2:
4438
9
  case Builtin::BI__sync_lock_release_4:
4439
11
  case Builtin::BI__sync_lock_release_8:
4440
13
  case Builtin::BI__sync_lock_release_16: {
4441
13
    Value *Ptr = CheckAtomicAlignment(*this, E);
4442
13
    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4443
13
    CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4444
13
    llvm::Type *ITy =
4445
13
        llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8);
4446
13
    llvm::StoreInst *Store =
4447
13
      Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
4448
13
                                 StoreSize);
4449
13
    Store->setAtomic(llvm::AtomicOrdering::Release);
4450
13
    return RValue::get(nullptr);
4451
11
  }
4452
4453
2
  case Builtin::BI__sync_synchronize: {
4454
    // We assume this is supposed to correspond to a C++0x-style
4455
    // sequentially-consistent fence (i.e. this is only usable for
4456
    // synchronization, not device I/O or anything like that). This intrinsic
4457
    // is really badly designed in the sense that in theory, there isn't
4458
    // any way to safely use it... but in practice, it mostly works
4459
    // to use it with non-atomic loads and stores to get acquire/release
4460
    // semantics.
4461
2
    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4462
2
    return RValue::get(nullptr);
4463
11
  }
4464
4465
35
  case Builtin::BI__builtin_nontemporal_load:
4466
35
    return RValue::get(EmitNontemporalLoad(*this, E));
4467
84
  case Builtin::BI__builtin_nontemporal_store:
4468
84
    return RValue::get(EmitNontemporalStore(*this, E));
4469
11
  case Builtin::BI__c11_atomic_is_lock_free:
4470
27
  case Builtin::BI__atomic_is_lock_free: {
4471
    // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4472
    // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4473
    // _Atomic(T) is always properly-aligned.
4474
27
    const char *LibCallName = "__atomic_is_lock_free";
4475
27
    CallArgList Args;
4476
27
    Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4477
27
             getContext().getSizeType());
4478
27
    if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4479
16
      Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4480
16
               getContext().VoidPtrTy);
4481
11
    else
4482
11
      Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4483
11
               getContext().VoidPtrTy);
4484
27
    const CGFunctionInfo &FuncInfo =
4485
27
        CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
4486
27
    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4487
27
    llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4488
27
    return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4489
27
                    ReturnValueSlot(), Args);
4490
11
  }
4491
4492
8
  case Builtin::BI__atomic_test_and_set: {
4493
    // Look at the argument type to determine whether this is a volatile
4494
    // operation. The parameter type is always volatile.
4495
8
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4496
8
    bool Volatile =
4497
8
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4498
4499
8
    Value *Ptr = EmitScalarExpr(E->getArg(0));
4500
8
    Value *NewVal = Builder.getInt8(1);
4501
8
    Value *Order = EmitScalarExpr(E->getArg(1));
4502
8
    if (isa<llvm::ConstantInt>(Order)) {
4503
8
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4504
8
      AtomicRMWInst *Result = nullptr;
4505
8
      switch (ord) {
4506
0
      case 0:  // memory_order_relaxed
4507
0
      default: // invalid order
4508
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4509
0
                                         llvm::AtomicOrdering::Monotonic);
4510
0
        break;
4511
0
      case 1: // memory_order_consume
4512
4
      case 2: // memory_order_acquire
4513
4
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4514
4
                                         llvm::AtomicOrdering::Acquire);
4515
4
        break;
4516
0
      case 3: // memory_order_release
4517
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4518
0
                                         llvm::AtomicOrdering::Release);
4519
0
        break;
4520
0
      case 4: // memory_order_acq_rel
4521
4522
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4523
0
                                         llvm::AtomicOrdering::AcquireRelease);
4524
0
        break;
4525
4
      case 5: // memory_order_seq_cst
4526
4
        Result = Builder.CreateAtomicRMW(
4527
4
            llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4528
4
            llvm::AtomicOrdering::SequentiallyConsistent);
4529
4
        break;
4530
8
      }
4531
8
      Result->setVolatile(Volatile);
4532
8
      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4533
8
    }
4534
4535
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4536
4537
0
    llvm::BasicBlock *BBs[5] = {
4538
0
      createBasicBlock("monotonic", CurFn),
4539
0
      createBasicBlock("acquire", CurFn),
4540
0
      createBasicBlock("release", CurFn),
4541
0
      createBasicBlock("acqrel", CurFn),
4542
0
      createBasicBlock("seqcst", CurFn)
4543
0
    };
4544
0
    llvm::AtomicOrdering Orders[5] = {
4545
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4546
0
        llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4547
0
        llvm::AtomicOrdering::SequentiallyConsistent};
4548
4549
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4550
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4551
4552
0
    Builder.SetInsertPoint(ContBB);
4553
0
    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4554
4555
0
    for (unsigned i = 0; i < 5; ++i) {
4556
0
      Builder.SetInsertPoint(BBs[i]);
4557
0
      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4558
0
                                                   Ptr, NewVal, Orders[i]);
4559
0
      RMW->setVolatile(Volatile);
4560
0
      Result->addIncoming(RMW, BBs[i]);
4561
0
      Builder.CreateBr(ContBB);
4562
0
    }
4563
4564
0
    SI->addCase(Builder.getInt32(0), BBs[0]);
4565
0
    SI->addCase(Builder.getInt32(1), BBs[1]);
4566
0
    SI->addCase(Builder.getInt32(2), BBs[1]);
4567
0
    SI->addCase(Builder.getInt32(3), BBs[2]);
4568
0
    SI->addCase(Builder.getInt32(4), BBs[3]);
4569
0
    SI->addCase(Builder.getInt32(5), BBs[4]);
4570
4571
0
    Builder.SetInsertPoint(ContBB);
4572
0
    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4573
8
  }
4574
4575
8
  case Builtin::BI__atomic_clear: {
4576
8
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4577
8
    bool Volatile =
4578
8
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4579
4580
8
    Address Ptr = EmitPointerWithAlignment(E->getArg(0));
4581
8
    Ptr = Ptr.withElementType(Int8Ty);
4582
8
    Value *NewVal = Builder.getInt8(0);
4583
8
    Value *Order = EmitScalarExpr(E->getArg(1));
4584
8
    if (isa<llvm::ConstantInt>(Order)) {
4585
8
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4586
8
      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4587
8
      switch (ord) {
4588
0
      case 0:  // memory_order_relaxed
4589
0
      default: // invalid order
4590
0
        Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4591
0
        break;
4592
4
      case 3:  // memory_order_release
4593
4
        Store->setOrdering(llvm::AtomicOrdering::Release);
4594
4
        break;
4595
4
      case 5:  // memory_order_seq_cst
4596
4
        Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4597
4
        break;
4598
8
      }
4599
8
      return RValue::get(nullptr);
4600
8
    }
4601
4602
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4603
4604
0
    llvm::BasicBlock *BBs[3] = {
4605
0
      createBasicBlock("monotonic", CurFn),
4606
0
      createBasicBlock("release", CurFn),
4607
0
      createBasicBlock("seqcst", CurFn)
4608
0
    };
4609
0
    llvm::AtomicOrdering Orders[3] = {
4610
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4611
0
        llvm::AtomicOrdering::SequentiallyConsistent};
4612
4613
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4614
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4615
4616
0
    for (unsigned i = 0; i < 3; ++i) {
4617
0
      Builder.SetInsertPoint(BBs[i]);
4618
0
      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4619
0
      Store->setOrdering(Orders[i]);
4620
0
      Builder.CreateBr(ContBB);
4621
0
    }
4622
4623
0
    SI->addCase(Builder.getInt32(0), BBs[0]);
4624
0
    SI->addCase(Builder.getInt32(3), BBs[1]);
4625
0
    SI->addCase(Builder.getInt32(5), BBs[2]);
4626
4627
0
    Builder.SetInsertPoint(ContBB);
4628
0
    return RValue::get(nullptr);
4629
8
  }
4630
4631
0
  case Builtin::BI__atomic_thread_fence:
4632
0
  case Builtin::BI__atomic_signal_fence:
4633
0
  case Builtin::BI__c11_atomic_thread_fence:
4634
0
  case Builtin::BI__c11_atomic_signal_fence: {
4635
0
    llvm::SyncScope::ID SSID;
4636
0
    if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4637
0
        BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4638
0
      SSID = llvm::SyncScope::SingleThread;
4639
0
    else
4640
0
      SSID = llvm::SyncScope::System;
4641
0
    Value *Order = EmitScalarExpr(E->getArg(0));
4642
0
    if (isa<llvm::ConstantInt>(Order)) {
4643
0
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4644
0
      switch (ord) {
4645
0
      case 0:  // memory_order_relaxed
4646
0
      default: // invalid order
4647
0
        break;
4648
0
      case 1:  // memory_order_consume
4649
0
      case 2:  // memory_order_acquire
4650
0
        Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4651
0
        break;
4652
0
      case 3:  // memory_order_release
4653
0
        Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4654
0
        break;
4655
0
      case 4:  // memory_order_acq_rel
4656
0
        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4657
0
        break;
4658
0
      case 5:  // memory_order_seq_cst
4659
0
        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4660
0
        break;
4661
0
      }
4662
0
      return RValue::get(nullptr);
4663
0
    }
4664
4665
0
    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4666
0
    AcquireBB = createBasicBlock("acquire", CurFn);
4667
0
    ReleaseBB = createBasicBlock("release", CurFn);
4668
0
    AcqRelBB = createBasicBlock("acqrel", CurFn);
4669
0
    SeqCstBB = createBasicBlock("seqcst", CurFn);
4670
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4671
4672
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4673
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4674
4675
0
    Builder.SetInsertPoint(AcquireBB);
4676
0
    Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4677
0
    Builder.CreateBr(ContBB);
4678
0
    SI->addCase(Builder.getInt32(1), AcquireBB);
4679
0
    SI->addCase(Builder.getInt32(2), AcquireBB);
4680
4681
0
    Builder.SetInsertPoint(ReleaseBB);
4682
0
    Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4683
0
    Builder.CreateBr(ContBB);
4684
0
    SI->addCase(Builder.getInt32(3), ReleaseBB);
4685
4686
0
    Builder.SetInsertPoint(AcqRelBB);
4687
0
    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4688
0
    Builder.CreateBr(ContBB);
4689
0
    SI->addCase(Builder.getInt32(4), AcqRelBB);
4690
4691
0
    Builder.SetInsertPoint(SeqCstBB);
4692
0
    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4693
0
    Builder.CreateBr(ContBB);
4694
0
    SI->addCase(Builder.getInt32(5), SeqCstBB);
4695
4696
0
    Builder.SetInsertPoint(ContBB);
4697
0
    return RValue::get(nullptr);
4698
0
  }
4699
4700
12
  case Builtin::BI__builtin_signbit:
4701
22
  case Builtin::BI__builtin_signbitf:
4702
30
  case Builtin::BI__builtin_signbitl: {
4703
30
    return RValue::get(
4704
30
        Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4705
30
                           ConvertType(E->getType())));
4706
22
  }
4707
2
  case Builtin::BI__warn_memset_zero_len:
4708
2
    return RValue::getIgnored();
4709
10
  case Builtin::BI__annotation: {
4710
    // Re-encode each wide string to UTF8 and make an MDString.
4711
10
    SmallVector<Metadata *, 1> Strings;
4712
16
    for (const Expr *Arg : E->arguments()) {
4713
16
      const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4714
16
      assert(Str->getCharByteWidth() == 2);
4715
16
      StringRef WideBytes = Str->getBytes();
4716
16
      std::string StrUtf8;
4717
16
      if (!convertUTF16ToUTF8String(
4718
16
              ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4719
0
        CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4720
0
        continue;
4721
0
      }
4722
16
      Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
4723
16
    }
4724
4725
    // Build and MDTuple of MDStrings and emit the intrinsic call.
4726
10
    llvm::Function *F =
4727
10
        CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4728
10
    MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
4729
10
    Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
4730
10
    return RValue::getIgnored();
4731
10
  }
4732
7
  case Builtin::BI__builtin_annotation: {
4733
7
    llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
4734
7
    llvm::Function *F =
4735
7
        CGM.getIntrinsic(llvm::Intrinsic::annotation,
4736
7
                         {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
4737
4738
    // Get the annotation string, go through casts. Sema requires this to be a
4739
    // non-wide string literal, potentially casted, so the cast<> is safe.
4740
7
    const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
4741
7
    StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
4742
7
    return RValue::get(
4743
7
        EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
4744
10
  }
4745
3
  case Builtin::BI__builtin_addcb:
4746
6
  case Builtin::BI__builtin_addcs:
4747
9
  case Builtin::BI__builtin_addc:
4748
12
  case Builtin::BI__builtin_addcl:
4749
15
  case Builtin::BI__builtin_addcll:
4750
18
  case Builtin::BI__builtin_subcb:
4751
21
  case Builtin::BI__builtin_subcs:
4752
24
  case Builtin::BI__builtin_subc:
4753
27
  case Builtin::BI__builtin_subcl:
4754
30
  case Builtin::BI__builtin_subcll: {
4755
4756
    // We translate all of these builtins from expressions of the form:
4757
    //   int x = ..., y = ..., carryin = ..., carryout, result;
4758
    //   result = __builtin_addc(x, y, carryin, &carryout);
4759
    //
4760
    // to LLVM IR of the form:
4761
    //
4762
    //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4763
    //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4764
    //   %carry1 = extractvalue {i32, i1} %tmp1, 1
4765
    //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
4766
    //                                                       i32 %carryin)
4767
    //   %result = extractvalue {i32, i1} %tmp2, 0
4768
    //   %carry2 = extractvalue {i32, i1} %tmp2, 1
4769
    //   %tmp3 = or i1 %carry1, %carry2
4770
    //   %tmp4 = zext i1 %tmp3 to i32
4771
    //   store i32 %tmp4, i32* %carryout
4772
4773
    // Scalarize our inputs.
4774
30
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
4775
30
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4776
30
    llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
4777
30
    Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
4778
4779
    // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
4780
30
    llvm::Intrinsic::ID IntrinsicId;
4781
30
    switch (BuiltinID) {
4782
0
    default: llvm_unreachable("Unknown multiprecision builtin id.");
4783
3
    case Builtin::BI__builtin_addcb:
4784
6
    case Builtin::BI__builtin_addcs:
4785
9
    case Builtin::BI__builtin_addc:
4786
12
    case Builtin::BI__builtin_addcl:
4787
15
    case Builtin::BI__builtin_addcll:
4788
15
      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4789
15
      break;
4790
3
    case Builtin::BI__builtin_subcb:
4791
6
    case Builtin::BI__builtin_subcs:
4792
9
    case Builtin::BI__builtin_subc:
4793
12
    case Builtin::BI__builtin_subcl:
4794
15
    case Builtin::BI__builtin_subcll:
4795
15
      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4796
15
      break;
4797
30
    }
4798
4799
    // Construct our resulting LLVM IR expression.
4800
30
    llvm::Value *Carry1;
4801
30
    llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
4802
30
                                              X, Y, Carry1);
4803
30
    llvm::Value *Carry2;
4804
30
    llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
4805
30
                                              Sum1, Carryin, Carry2);
4806
30
    llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
4807
30
                                               X->getType());
4808
30
    Builder.CreateStore(CarryOut, CarryOutPtr);
4809
30
    return RValue::get(Sum2);
4810
30
  }
4811
4812
29
  case Builtin::BI__builtin_add_overflow:
4813
43
  case Builtin::BI__builtin_sub_overflow:
4814
105
  case Builtin::BI__builtin_mul_overflow: {
4815
105
    const clang::Expr *LeftArg = E->getArg(0);
4816
105
    const clang::Expr *RightArg = E->getArg(1);
4817
105
    const clang::Expr *ResultArg = E->getArg(2);
4818
4819
105
    clang::QualType ResultQTy =
4820
105
        ResultArg->getType()->castAs<PointerType>()->getPointeeType();
4821
4822
105
    WidthAndSignedness LeftInfo =
4823
105
        getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
4824
105
    WidthAndSignedness RightInfo =
4825
105
        getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
4826
105
    WidthAndSignedness ResultInfo =
4827
105
        getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
4828
4829
    // Handle mixed-sign multiplication as a special case, because adding
4830
    // runtime or backend support for our generic irgen would be too expensive.
4831
105
    if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
4832
27
      return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
4833
27
                                          RightInfo, ResultArg, ResultQTy,
4834
27
                                          ResultInfo);
4835
4836
78
    if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
4837
78
                                              ResultInfo))
4838
9
      return EmitCheckedUnsignedMultiplySignedResult(
4839
9
          *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
4840
9
          ResultInfo);
4841
4842
69
    WidthAndSignedness EncompassingInfo =
4843
69
        EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
4844
4845
69
    llvm::Type *EncompassingLLVMTy =
4846
69
        llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
4847
4848
69
    llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
4849
4850
69
    llvm::Intrinsic::ID IntrinsicId;
4851
69
    switch (BuiltinID) {
4852
0
    default:
4853
0
      llvm_unreachable("Unknown overflow builtin id.");
4854
29
    case Builtin::BI__builtin_add_overflow:
4855
29
      IntrinsicId = EncompassingInfo.Signed
4856
29
                        ? 
llvm::Intrinsic::sadd_with_overflow17
4857
29
                        : 
llvm::Intrinsic::uadd_with_overflow12
;
4858
29
      break;
4859
14
    case Builtin::BI__builtin_sub_overflow:
4860
14
      IntrinsicId = EncompassingInfo.Signed
4861
14
                        ? 
llvm::Intrinsic::ssub_with_overflow11
4862
14
                        : 
llvm::Intrinsic::usub_with_overflow3
;
4863
14
      break;
4864
26
    case Builtin::BI__builtin_mul_overflow:
4865
26
      IntrinsicId = EncompassingInfo.Signed
4866
26
                        ? 
llvm::Intrinsic::smul_with_overflow23
4867
26
                        : 
llvm::Intrinsic::umul_with_overflow3
;
4868
26
      break;
4869
69
    }
4870
4871
69
    llvm::Value *Left = EmitScalarExpr(LeftArg);
4872
69
    llvm::Value *Right = EmitScalarExpr(RightArg);
4873
69
    Address ResultPtr = EmitPointerWithAlignment(ResultArg);
4874
4875
    // Extend each operand to the encompassing type.
4876
69
    Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
4877
69
    Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
4878
4879
    // Perform the operation on the extended values.
4880
69
    llvm::Value *Overflow, *Result;
4881
69
    Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
4882
4883
69
    if (EncompassingInfo.Width > ResultInfo.Width) {
4884
      // The encompassing type is wider than the result type, so we need to
4885
      // truncate it.
4886
9
      llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
4887
4888
      // To see if the truncation caused an overflow, we will extend
4889
      // the result and then compare it to the original result.
4890
9
      llvm::Value *ResultTruncExt = Builder.CreateIntCast(
4891
9
          ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
4892
9
      llvm::Value *TruncationOverflow =
4893
9
          Builder.CreateICmpNE(Result, ResultTruncExt);
4894
4895
9
      Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
4896
9
      Result = ResultTrunc;
4897
9
    }
4898
4899
    // Finally, store the result using the pointer.
4900
69
    bool isVolatile =
4901
69
      ResultArg->getType()->getPointeeType().isVolatileQualified();
4902
69
    Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
4903
4904
69
    return RValue::get(Overflow);
4905
69
  }
4906
4907
3
  case Builtin::BI__builtin_uadd_overflow:
4908
6
  case Builtin::BI__builtin_uaddl_overflow:
4909
9
  case Builtin::BI__builtin_uaddll_overflow:
4910
12
  case Builtin::BI__builtin_usub_overflow:
4911
15
  case Builtin::BI__builtin_usubl_overflow:
4912
18
  case Builtin::BI__builtin_usubll_overflow:
4913
21
  case Builtin::BI__builtin_umul_overflow:
4914
24
  case Builtin::BI__builtin_umull_overflow:
4915
27
  case Builtin::BI__builtin_umulll_overflow:
4916
31
  case Builtin::BI__builtin_sadd_overflow:
4917
34
  case Builtin::BI__builtin_saddl_overflow:
4918
37
  case Builtin::BI__builtin_saddll_overflow:
4919
40
  case Builtin::BI__builtin_ssub_overflow:
4920
43
  case Builtin::BI__builtin_ssubl_overflow:
4921
46
  case Builtin::BI__builtin_ssubll_overflow:
4922
49
  case Builtin::BI__builtin_smul_overflow:
4923
52
  case Builtin::BI__builtin_smull_overflow:
4924
55
  case Builtin::BI__builtin_smulll_overflow: {
4925
4926
    // We translate all of these builtins directly to the relevant llvm IR node.
4927
4928
    // Scalarize our inputs.
4929
55
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
4930
55
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4931
55
    Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
4932
4933
    // Decide which of the overflow intrinsics we are lowering to:
4934
55
    llvm::Intrinsic::ID IntrinsicId;
4935
55
    switch (BuiltinID) {
4936
0
    default: llvm_unreachable("Unknown overflow builtin id.");
4937
3
    case Builtin::BI__builtin_uadd_overflow:
4938
6
    case Builtin::BI__builtin_uaddl_overflow:
4939
9
    case Builtin::BI__builtin_uaddll_overflow:
4940
9
      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4941
9
      break;
4942
3
    case Builtin::BI__builtin_usub_overflow:
4943
6
    case Builtin::BI__builtin_usubl_overflow:
4944
9
    case Builtin::BI__builtin_usubll_overflow:
4945
9
      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4946
9
      break;
4947
3
    case Builtin::BI__builtin_umul_overflow:
4948
6
    case Builtin::BI__builtin_umull_overflow:
4949
9
    case Builtin::BI__builtin_umulll_overflow:
4950
9
      IntrinsicId = llvm::Intrinsic::umul_with_overflow;
4951
9
      break;
4952
4
    case Builtin::BI__builtin_sadd_overflow:
4953
7
    case Builtin::BI__builtin_saddl_overflow:
4954
10
    case Builtin::BI__builtin_saddll_overflow:
4955
10
      IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
4956
10
      break;
4957
3
    case Builtin::BI__builtin_ssub_overflow:
4958
6
    case Builtin::BI__builtin_ssubl_overflow:
4959
9
    case Builtin::BI__builtin_ssubll_overflow:
4960
9
      IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
4961
9
      break;
4962
3
    case Builtin::BI__builtin_smul_overflow:
4963
6
    case Builtin::BI__builtin_smull_overflow:
4964
9
    case Builtin::BI__builtin_smulll_overflow:
4965
9
      IntrinsicId = llvm::Intrinsic::smul_with_overflow;
4966
9
      break;
4967
55
    }
4968
4969
4970
55
    llvm::Value *Carry;
4971
55
    llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
4972
55
    Builder.CreateStore(Sum, SumOutPtr);
4973
4974
55
    return RValue::get(Carry);
4975
55
  }
4976
1.01k
  case Builtin::BIaddressof:
4977
1.01k
  case Builtin::BI__addressof:
4978
1.05k
  case Builtin::BI__builtin_addressof:
4979
1.05k
    return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
4980
7
  case Builtin::BI__builtin_function_start:
4981
7
    return RValue::get(CGM.GetFunctionStart(
4982
7
        E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
4983
237
  case Builtin::BI__builtin_operator_new:
4984
237
    return EmitBuiltinNewDeleteCall(
4985
237
        E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
4986
252
  case Builtin::BI__builtin_operator_delete:
4987
252
    EmitBuiltinNewDeleteCall(
4988
252
        E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
4989
252
    return RValue::get(nullptr);
4990
4991
5
  case Builtin::BI__builtin_is_aligned:
4992
5
    return EmitBuiltinIsAligned(E);
4993
7
  case Builtin::BI__builtin_align_up:
4994
7
    return EmitBuiltinAlignTo(E, true);
4995
6
  case Builtin::BI__builtin_align_down:
4996
6
    return EmitBuiltinAlignTo(E, false);
4997
4998
7
  case Builtin::BI__noop:
4999
    // __noop always evaluates to an integer literal zero.
5000
7
    return RValue::get(ConstantInt::get(IntTy, 0));
5001
8
  case Builtin::BI__builtin_call_with_static_chain: {
5002
8
    const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5003
8
    const Expr *Chain = E->getArg(1);
5004
8
    return EmitCall(Call->getCallee()->getType(),
5005
8
                    EmitCallee(Call->getCallee()), Call, ReturnValue,
5006
8
                    EmitScalarExpr(Chain));
5007
1.01k
  }
5008
4
  case Builtin::BI_InterlockedExchange8:
5009
8
  case Builtin::BI_InterlockedExchange16:
5010
22
  case Builtin::BI_InterlockedExchange:
5011
26
  case Builtin::BI_InterlockedExchangePointer:
5012
26
    return RValue::get(
5013
26
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5014
4
  case Builtin::BI_InterlockedCompareExchangePointer:
5015
8
  case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5016
8
    llvm::Type *RTy;
5017
8
    llvm::IntegerType *IntType = IntegerType::get(
5018
8
        getLLVMContext(), getContext().getTypeSize(E->getType()));
5019
5020
8
    llvm::Value *Destination = EmitScalarExpr(E->getArg(0));
5021
5022
8
    llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5023
8
    RTy = Exchange->getType();
5024
8
    Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5025
5026
8
    llvm::Value *Comparand =
5027
8
      Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5028
5029
8
    auto Ordering =
5030
8
      BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5031
4
      AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5032
5033
8
    auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
5034
8
                                              Ordering, Ordering);
5035
8
    Result->setVolatile(true);
5036
5037
8
    return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5038
8
                                                                         0),
5039
8
                                              RTy));
5040
4
  }
5041
4
  case Builtin::BI_InterlockedCompareExchange8:
5042
8
  case Builtin::BI_InterlockedCompareExchange16:
5043
22
  case Builtin::BI_InterlockedCompareExchange:
5044
26
  case Builtin::BI_InterlockedCompareExchange64:
5045
26
    return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5046
4
  case Builtin::BI_InterlockedIncrement16:
5047
18
  case Builtin::BI_InterlockedIncrement:
5048
18
    return RValue::get(
5049
18
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5050
4
  case Builtin::BI_InterlockedDecrement16:
5051
18
  case Builtin::BI_InterlockedDecrement:
5052
18
    return RValue::get(
5053
18
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5054
4
  case Builtin::BI_InterlockedAnd8:
5055
8
  case Builtin::BI_InterlockedAnd16:
5056
22
  case Builtin::BI_InterlockedAnd:
5057
22
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5058
4
  case Builtin::BI_InterlockedExchangeAdd8:
5059
8
  case Builtin::BI_InterlockedExchangeAdd16:
5060
22
  case Builtin::BI_InterlockedExchangeAdd:
5061
22
    return RValue::get(
5062
22
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5063
4
  case Builtin::BI_InterlockedExchangeSub8:
5064
8
  case Builtin::BI_InterlockedExchangeSub16:
5065
22
  case Builtin::BI_InterlockedExchangeSub:
5066
22
    return RValue::get(
5067
22
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5068
4
  case Builtin::BI_InterlockedOr8:
5069
8
  case Builtin::BI_InterlockedOr16:
5070
22
  case Builtin::BI_InterlockedOr:
5071
22
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5072
4
  case Builtin::BI_InterlockedXor8:
5073
8
  case Builtin::BI_InterlockedXor16:
5074
22
  case Builtin::BI_InterlockedXor:
5075
22
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5076
5077
3
  case Builtin::BI_bittest64:
5078
6
  case Builtin::BI_bittest:
5079
9
  case Builtin::BI_bittestandcomplement64:
5080
12
  case Builtin::BI_bittestandcomplement:
5081
15
  case Builtin::BI_bittestandreset64:
5082
18
  case Builtin::BI_bittestandreset:
5083
21
  case Builtin::BI_bittestandset64:
5084
24
  case Builtin::BI_bittestandset:
5085
27
  case Builtin::BI_interlockedbittestandreset:
5086
30
  case Builtin::BI_interlockedbittestandreset64:
5087
33
  case Builtin::BI_interlockedbittestandset64:
5088
39
  case Builtin::BI_interlockedbittestandset:
5089
41
  case Builtin::BI_interlockedbittestandset_acq:
5090
43
  case Builtin::BI_interlockedbittestandset_rel:
5091
45
  case Builtin::BI_interlockedbittestandset_nf:
5092
47
  case Builtin::BI_interlockedbittestandreset_acq:
5093
49
  case Builtin::BI_interlockedbittestandreset_rel:
5094
51
  case Builtin::BI_interlockedbittestandreset_nf:
5095
51
    return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5096
5097
    // These builtins exist to emit regular volatile loads and stores not
5098
    // affected by the -fms-volatile setting.
5099
4
  case Builtin::BI__iso_volatile_load8:
5100
8
  case Builtin::BI__iso_volatile_load16:
5101
12
  case Builtin::BI__iso_volatile_load32:
5102
16
  case Builtin::BI__iso_volatile_load64:
5103
16
    return RValue::get(EmitISOVolatileLoad(*this, E));
5104
4
  case Builtin::BI__iso_volatile_store8:
5105
8
  case Builtin::BI__iso_volatile_store16:
5106
12
  case Builtin::BI__iso_volatile_store32:
5107
16
  case Builtin::BI__iso_volatile_store64:
5108
16
    return RValue::get(EmitISOVolatileStore(*this, E));
5109
5110
0
  case Builtin::BI__exception_code:
5111
18
  case Builtin::BI_exception_code:
5112
18
    return RValue::get(EmitSEHExceptionCode());
5113
0
  case Builtin::BI__exception_info:
5114
0
  case Builtin::BI_exception_info:
5115
0
    return RValue::get(EmitSEHExceptionInfo());
5116
3
  case Builtin::BI__abnormal_termination:
5117
5
  case Builtin::BI_abnormal_termination:
5118
5
    return RValue::get(EmitSEHAbnormalTermination());
5119
6
  case Builtin::BI_setjmpex:
5120
6
    if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5121
6
        E->getArg(0)->getType()->isPointerType())
5122
6
      return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5123
0
    break;
5124
11
  case Builtin::BI_setjmp:
5125
11
    if (getTarget().getTriple().isOSMSVCRT() && 
E->getNumArgs() == 16
&&
5126
11
        
E->getArg(0)->getType()->isPointerType()6
) {
5127
6
      if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5128
2
        return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5129
4
      else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5130
2
        return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5131
2
      return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5132
6
    }
5133
5
    break;
5134
5135
  // C++ std:: builtins.
5136
5.57k
  case Builtin::BImove:
5137
5.69k
  case Builtin::BImove_if_noexcept:
5138
15.6k
  case Builtin::BIforward:
5139
15.6k
  case Builtin::BIforward_like:
5140
15.6k
  case Builtin::BIas_const:
5141
15.6k
    return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5142
3
  case Builtin::BI__GetExceptionInfo: {
5143
3
    if (llvm::GlobalVariable *GV =
5144
3
            CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
5145
3
      return RValue::get(GV);
5146
0
    break;
5147
3
  }
5148
5149
4
  case Builtin::BI__fastfail:
5150
4
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5151
5152
3
  case Builtin::BI__builtin_coro_id:
5153
3
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5154
28
  case Builtin::BI__builtin_coro_promise:
5155
28
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5156
34
  case Builtin::BI__builtin_coro_resume:
5157
34
    EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5158
34
    return RValue::get(nullptr);
5159
420
  case Builtin::BI__builtin_coro_frame:
5160
420
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5161
4
  case Builtin::BI__builtin_coro_noop:
5162
4
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5163
136
  case Builtin::BI__builtin_coro_free:
5164
136
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5165
14
  case Builtin::BI__builtin_coro_destroy:
5166
14
    EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5167
14
    return RValue::get(nullptr);
5168
5
  case Builtin::BI__builtin_coro_done:
5169
5
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5170
2
  case Builtin::BI__builtin_coro_alloc:
5171
2
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5172
2
  case Builtin::BI__builtin_coro_begin:
5173
2
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5174
1
  case Builtin::BI__builtin_coro_end:
5175
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5176
1
  case Builtin::BI__builtin_coro_suspend:
5177
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5178
117
  case Builtin::BI__builtin_coro_size:
5179
117
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5180
21
  case Builtin::BI__builtin_coro_align:
5181
21
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5182
5183
  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5184
13
  case Builtin::BIread_pipe:
5185
16
  case Builtin::BIwrite_pipe: {
5186
16
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5187
16
          *Arg1 = EmitScalarExpr(E->getArg(1));
5188
16
    CGOpenCLRuntime OpenCLRT(CGM);
5189
16
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5190
16
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5191
5192
    // Type of the generic packet parameter.
5193
16
    unsigned GenericAS =
5194
16
        getContext().getTargetAddressSpace(LangAS::opencl_generic);
5195
16
    llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5196
5197
    // Testing which overloaded version we should generate the call for.
5198
16
    if (2U == E->getNumArgs()) {
5199
14
      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? 
"__read_pipe_2"12
5200
14
                                                             : 
"__write_pipe_2"2
;
5201
      // Creating a generic function type to be able to call with any builtin or
5202
      // user defined type.
5203
14
      llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5204
14
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5205
14
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5206
14
      Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5207
14
      return RValue::get(
5208
14
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5209
14
                          {Arg0, BCast, PacketSize, PacketAlign}));
5210
14
    } else {
5211
2
      assert(4 == E->getNumArgs() &&
5212
2
             "Illegal number of parameters to pipe function");
5213
2
      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? 
"__read_pipe_4"1
5214
2
                                                             : 
"__write_pipe_4"1
;
5215
5216
2
      llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5217
2
                              Int32Ty, Int32Ty};
5218
2
      Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5219
2
            *Arg3 = EmitScalarExpr(E->getArg(3));
5220
2
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5221
2
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5222
2
      Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5223
      // We know the third argument is an integer type, but we may need to cast
5224
      // it to i32.
5225
2
      if (Arg2->getType() != Int32Ty)
5226
0
        Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5227
2
      return RValue::get(
5228
2
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5229
2
                          {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5230
2
    }
5231
16
  }
5232
  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5233
  // functions
5234
1
  case Builtin::BIreserve_read_pipe:
5235
2
  case Builtin::BIreserve_write_pipe:
5236
3
  case Builtin::BIwork_group_reserve_read_pipe:
5237
4
  case Builtin::BIwork_group_reserve_write_pipe:
5238
5
  case Builtin::BIsub_group_reserve_read_pipe:
5239
6
  case Builtin::BIsub_group_reserve_write_pipe: {
5240
    // Composing the mangled name for the function.
5241
6
    const char *Name;
5242
6
    if (BuiltinID == Builtin::BIreserve_read_pipe)
5243
1
      Name = "__reserve_read_pipe";
5244
5
    else if (BuiltinID == Builtin::BIreserve_write_pipe)
5245
1
      Name = "__reserve_write_pipe";
5246
4
    else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5247
1
      Name = "__work_group_reserve_read_pipe";
5248
3
    else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5249
1
      Name = "__work_group_reserve_write_pipe";
5250
2
    else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5251
1
      Name = "__sub_group_reserve_read_pipe";
5252
1
    else
5253
1
      Name = "__sub_group_reserve_write_pipe";
5254
5255
6
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5256
6
          *Arg1 = EmitScalarExpr(E->getArg(1));
5257
6
    llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5258
6
    CGOpenCLRuntime OpenCLRT(CGM);
5259
6
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5260
6
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5261
5262
    // Building the generic function prototype.
5263
6
    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5264
6
    llvm::FunctionType *FTy = llvm::FunctionType::get(
5265
6
        ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5266
    // We know the second argument is an integer type, but we may need to cast
5267
    // it to i32.
5268
6
    if (Arg1->getType() != Int32Ty)
5269
0
      Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5270
6
    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5271
6
                                       {Arg0, Arg1, PacketSize, PacketAlign}));
5272
5
  }
5273
  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5274
  // functions
5275
1
  case Builtin::BIcommit_read_pipe:
5276
2
  case Builtin::BIcommit_write_pipe:
5277
3
  case Builtin::BIwork_group_commit_read_pipe:
5278
4
  case Builtin::BIwork_group_commit_write_pipe:
5279
5
  case Builtin::BIsub_group_commit_read_pipe:
5280
6
  case Builtin::BIsub_group_commit_write_pipe: {
5281
6
    const char *Name;
5282
6
    if (BuiltinID == Builtin::BIcommit_read_pipe)
5283
1
      Name = "__commit_read_pipe";
5284
5
    else if (BuiltinID == Builtin::BIcommit_write_pipe)
5285
1
      Name = "__commit_write_pipe";
5286
4
    else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5287
1
      Name = "__work_group_commit_read_pipe";
5288
3
    else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5289
1
      Name = "__work_group_commit_write_pipe";
5290
2
    else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5291
1
      Name = "__sub_group_commit_read_pipe";
5292
1
    else
5293
1
      Name = "__sub_group_commit_write_pipe";
5294
5295
6
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5296
6
          *Arg1 = EmitScalarExpr(E->getArg(1));
5297
6
    CGOpenCLRuntime OpenCLRT(CGM);
5298
6
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5299
6
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5300
5301
    // Building the generic function prototype.
5302
6
    llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5303
6
    llvm::FunctionType *FTy =
5304
6
        llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5305
6
                                llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5306
5307
6
    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5308
6
                                       {Arg0, Arg1, PacketSize, PacketAlign}));
5309
5
  }
5310
  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5311
4
  case Builtin::BIget_pipe_num_packets:
5312
8
  case Builtin::BIget_pipe_max_packets: {
5313
8
    const char *BaseName;
5314
8
    const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5315
8
    if (BuiltinID == Builtin::BIget_pipe_num_packets)
5316
4
      BaseName = "__get_pipe_num_packets";
5317
4
    else
5318
4
      BaseName = "__get_pipe_max_packets";
5319
8
    std::string Name = std::string(BaseName) +
5320
8
                       std::string(PipeTy->isReadOnly() ? 
"_ro"4
:
"_wo"4
);
5321
5322
    // Building the generic function prototype.
5323
8
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
5324
8
    CGOpenCLRuntime OpenCLRT(CGM);
5325
8
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5326
8
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5327
8
    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5328
8
    llvm::FunctionType *FTy = llvm::FunctionType::get(
5329
8
        Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5330
5331
8
    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5332
8
                                       {Arg0, PacketSize, PacketAlign}));
5333
4
  }
5334
5335
  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5336
19
  case Builtin::BIto_global:
5337
32
  case Builtin::BIto_local:
5338
45
  case Builtin::BIto_private: {
5339
45
    auto Arg0 = EmitScalarExpr(E->getArg(0));
5340
45
    auto NewArgT = llvm::PointerType::get(
5341
45
        getLLVMContext(),
5342
45
        CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5343
45
    auto NewRetT = llvm::PointerType::get(
5344
45
        getLLVMContext(),
5345
45
        CGM.getContext().getTargetAddressSpace(
5346
45
            E->getType()->getPointeeType().getAddressSpace()));
5347
45
    auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5348
45
    llvm::Value *NewArg;
5349
45
    if (Arg0->getType()->getPointerAddressSpace() !=
5350
45
        NewArgT->getPointerAddressSpace())
5351
30
      NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5352
15
    else
5353
15
      NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5354
45
    auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5355
45
    auto NewCall =
5356
45
        EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5357
45
    return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5358
45
      ConvertType(E->getType())));
5359
32
  }
5360
5361
  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5362
  // It contains four different overload formats specified in Table 6.13.17.1.
5363
125
  case Builtin::BIenqueue_kernel: {
5364
125
    StringRef Name; // Generated function call name
5365
125
    unsigned NumArgs = E->getNumArgs();
5366
5367
125
    llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5368
125
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5369
125
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5370
5371
125
    llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5372
125
    llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5373
125
    LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5374
125
    llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
5375
125
    llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
5376
5377
125
    if (NumArgs == 4) {
5378
      // The most basic form of the call with parameters:
5379
      // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5380
38
      Name = "__enqueue_kernel_basic";
5381
38
      llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5382
38
                              GenericVoidPtrTy};
5383
38
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5384
38
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5385
5386
38
      auto Info =
5387
38
          CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5388
38
      llvm::Value *Kernel =
5389
38
          Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5390
38
      llvm::Value *Block =
5391
38
          Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5392
5393
38
      AttrBuilder B(Builder.getContext());
5394
38
      B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
5395
38
      llvm::AttributeList ByValAttrSet =
5396
38
          llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5397
5398
38
      auto RTCall =
5399
38
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5400
38
                          {Queue, Flags, Range, Kernel, Block});
5401
38
      RTCall->setAttributes(ByValAttrSet);
5402
38
      return RValue::get(RTCall);
5403
38
    }
5404
87
    assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5405
5406
    // Create a temporary array to hold the sizes of local pointer arguments
5407
    // for the block. \p First is the position of the first size argument.
5408
87
    auto CreateArrayForSizeVar = [=](unsigned First)
5409
87
        -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5410
69
      llvm::APInt ArraySize(32, NumArgs - First);
5411
69
      QualType SizeArrayTy = getContext().getConstantArrayType(
5412
69
          getContext().getSizeType(), ArraySize, nullptr,
5413
69
          ArraySizeModifier::Normal,
5414
69
          /*IndexTypeQuals=*/0);
5415
69
      auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5416
69
      llvm::Value *TmpPtr = Tmp.getPointer();
5417
69
      llvm::Value *TmpSize = EmitLifetimeStart(
5418
69
          CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5419
69
      llvm::Value *ElemPtr;
5420
      // Each of the following arguments specifies the size of the corresponding
5421
      // argument passed to the enqueued block.
5422
69
      auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5423
156
      for (unsigned I = First; I < NumArgs; 
++I87
) {
5424
87
        auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5425
87
        auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5426
87
                                      {Zero, Index});
5427
87
        if (I == First)
5428
69
          ElemPtr = GEP;
5429
87
        auto *V =
5430
87
            Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5431
87
        Builder.CreateAlignedStore(
5432
87
            V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5433
87
      }
5434
69
      return std::tie(ElemPtr, TmpSize, TmpPtr);
5435
69
    };
5436
5437
    // Could have events and/or varargs.
5438
87
    if (E->getArg(3)->getType()->isBlockPointerType()) {
5439
      // No events passed, but has variadic arguments.
5440
51
      Name = "__enqueue_kernel_varargs";
5441
51
      auto Info =
5442
51
          CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5443
51
      llvm::Value *Kernel =
5444
51
          Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5445
51
      auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5446
51
      llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5447
51
      std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5448
5449
      // Create a vector of the arguments, as well as a constant value to
5450
      // express to the runtime the number of variadic arguments.
5451
51
      llvm::Value *const Args[] = {Queue,  Flags,
5452
51
                                   Range,  Kernel,
5453
51
                                   Block,  ConstantInt::get(IntTy, NumArgs - 4),
5454
51
                                   ElemPtr};
5455
51
      llvm::Type *const ArgTys[] = {
5456
51
          QueueTy,          IntTy, RangeTy,           GenericVoidPtrTy,
5457
51
          GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5458
5459
51
      llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5460
51
      auto Call = RValue::get(
5461
51
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5462
51
      if (TmpSize)
5463
16
        EmitLifetimeEnd(TmpSize, TmpPtr);
5464
51
      return Call;
5465
51
    }
5466
    // Any calls now have event arguments passed.
5467
36
    if (NumArgs >= 7) {
5468
36
      llvm::PointerType *PtrTy = llvm::PointerType::get(
5469
36
          CGM.getLLVMContext(),
5470
36
          CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5471
5472
36
      llvm::Value *NumEvents =
5473
36
          Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5474
5475
      // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5476
      // to be a null pointer constant (including `0` literal), we can take it
5477
      // into account and emit null pointer directly.
5478
36
      llvm::Value *EventWaitList = nullptr;
5479
36
      if (E->getArg(4)->isNullPointerConstant(
5480
36
              getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5481
9
        EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5482
27
      } else {
5483
27
        EventWaitList = E->getArg(4)->getType()->isArrayType()
5484
27
                        ? 
EmitArrayToPointerDecay(E->getArg(4)).getPointer()18
5485
27
                        : 
EmitScalarExpr(E->getArg(4))9
;
5486
        // Convert to generic address space.
5487
27
        EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5488
27
      }
5489
36
      llvm::Value *EventRet = nullptr;
5490
36
      if (E->getArg(5)->isNullPointerConstant(
5491
36
              getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5492
9
        EventRet = llvm::ConstantPointerNull::get(PtrTy);
5493
27
      } else {
5494
27
        EventRet =
5495
27
            Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5496
27
      }
5497
5498
36
      auto Info =
5499
36
          CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
5500
36
      llvm::Value *Kernel =
5501
36
          Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5502
36
      llvm::Value *Block =
5503
36
          Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5504
5505
36
      std::vector<llvm::Type *> ArgTys = {
5506
36
          QueueTy, Int32Ty, RangeTy,          Int32Ty,
5507
36
          PtrTy,   PtrTy,   GenericVoidPtrTy, GenericVoidPtrTy};
5508
5509
36
      std::vector<llvm::Value *> Args = {Queue,     Flags,         Range,
5510
36
                                         NumEvents, EventWaitList, EventRet,
5511
36
                                         Kernel,    Block};
5512
5513
36
      if (NumArgs == 7) {
5514
        // Has events but no variadics.
5515
18
        Name = "__enqueue_kernel_basic_events";
5516
18
        llvm::FunctionType *FTy = llvm::FunctionType::get(
5517
18
            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5518
18
        return RValue::get(
5519
18
            EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5520
18
                            llvm::ArrayRef<llvm::Value *>(Args)));
5521
18
      }
5522
      // Has event info and variadics
5523
      // Pass the number of variadics to the runtime function too.
5524
18
      Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5525
18
      ArgTys.push_back(Int32Ty);
5526
18
      Name = "__enqueue_kernel_events_varargs";
5527
5528
18
      llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5529
18
      std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5530
18
      Args.push_back(ElemPtr);
5531
18
      ArgTys.push_back(ElemPtr->getType());
5532
5533
18
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5534
18
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5535
18
      auto Call =
5536
18
          RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5537
18
                                      llvm::ArrayRef<llvm::Value *>(Args)));
5538
18
      if (TmpSize)
5539
6
        EmitLifetimeEnd(TmpSize, TmpPtr);
5540
18
      return Call;
5541
36
    }
5542
36
    [[fallthrough]];
5543
0
  }
5544
  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5545
  // parameter.
5546
19
  case Builtin::BIget_kernel_work_group_size: {
5547
19
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5548
19
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5549
19
    auto Info =
5550
19
        CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5551
19
    Value *Kernel =
5552
19
        Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5553
19
    Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5554
19
    return RValue::get(EmitRuntimeCall(
5555
19
        CGM.CreateRuntimeFunction(
5556
19
            llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5557
19
                                    false),
5558
19
            "__get_kernel_work_group_size_impl"),
5559
19
        {Kernel, Arg}));
5560
0
  }
5561
28
  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5562
28
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5563
28
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5564
28
    auto Info =
5565
28
        CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5566
28
    Value *Kernel =
5567
28
        Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5568
28
    Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5569
28
    return RValue::get(EmitRuntimeCall(
5570
28
        CGM.CreateRuntimeFunction(
5571
28
            llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5572
28
                                    false),
5573
28
            "__get_kernel_preferred_work_group_size_multiple_impl"),
5574
28
        {Kernel, Arg}));
5575
0
  }
5576
9
  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5577
18
  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5578
18
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5579
18
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5580
18
    LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5581
18
    llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
5582
18
    auto Info =
5583
18
        CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
5584
18
    Value *Kernel =
5585
18
        Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5586
18
    Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5587
18
    const char *Name =
5588
18
        BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5589
18
            ? 
"__get_kernel_max_sub_group_size_for_ndrange_impl"9
5590
18
            : 
"__get_kernel_sub_group_count_for_ndrange_impl"9
;
5591
18
    return RValue::get(EmitRuntimeCall(
5592
18
        CGM.CreateRuntimeFunction(
5593
18
            llvm::FunctionType::get(
5594
18
                IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5595
18
                false),
5596
18
            Name),
5597
18
        {NDRange, Kernel, Block}));
5598
9
  }
5599
5600
3
  case Builtin::BI__builtin_store_half:
5601
6
  case Builtin::BI__builtin_store_halff: {
5602
6
    Value *Val = EmitScalarExpr(E->getArg(0));
5603
6
    Address Address = EmitPointerWithAlignment(E->getArg(1));
5604
6
    Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5605
6
    Builder.CreateStore(HalfVal, Address);
5606
6
    return RValue::get(nullptr);
5607
3
  }
5608
3
  case Builtin::BI__builtin_load_half: {
5609
3
    Address Address = EmitPointerWithAlignment(E->getArg(0));
5610
3
    Value *HalfVal = Builder.CreateLoad(Address);
5611
3
    return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5612
3
  }
5613
3
  case Builtin::BI__builtin_load_halff: {
5614
3
    Address Address = EmitPointerWithAlignment(E->getArg(0));
5615
3
    Value *HalfVal = Builder.CreateLoad(Address);
5616
3
    return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5617
3
  }
5618
3.59k
  case Builtin::BIprintf:
5619
3.59k
    if (getTarget().getTriple().isNVPTX() ||
5620
3.59k
        
getTarget().getTriple().isAMDGCN()3.58k
) {
5621
18
      if (getLangOpts().OpenMPIsTargetDevice)
5622
6
        return EmitOpenMPDevicePrintfCallExpr(E);
5623
12
      if (getTarget().getTriple().isNVPTX())
5624
0
        return EmitNVPTXDevicePrintfCallExpr(E);
5625
12
      if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5626
12
        return EmitAMDGPUDevicePrintfCallExpr(E);
5627
12
    }
5628
5629
3.57k
    break;
5630
3.57k
  case Builtin::BI__builtin_canonicalize:
5631
4
  case Builtin::BI__builtin_canonicalizef:
5632
5
  case Builtin::BI__builtin_canonicalizef16:
5633
7
  case Builtin::BI__builtin_canonicalizel:
5634
7
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5635
5636
4
  case Builtin::BI__builtin_thread_pointer: {
5637
4
    if (!getContext().getTargetInfo().isTLSSupported())
5638
0
      CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5639
    // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5640
4
    break;
5641
5
  }
5642
59
  case Builtin::BI__builtin_os_log_format:
5643
59
    return emitBuiltinOSLogFormat(*E);
5644
5645
17
  case Builtin::BI__xray_customevent: {
5646
17
    if (!ShouldXRayInstrumentFunction())
5647
0
      return RValue::getIgnored();
5648
5649
17
    if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
5650
17
            XRayInstrKind::Custom))
5651
7
      return RValue::getIgnored();
5652
5653
10
    if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5654
10
      if (XRayAttr->neverXRayInstrument() && 
!AlwaysEmitXRayCustomEvents()2
)
5655
1
        return RValue::getIgnored();
5656
5657
9
    Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5658
9
    auto FTy = F->getFunctionType();
5659
9
    auto Arg0 = E->getArg(0);
5660
9
    auto Arg0Val = EmitScalarExpr(Arg0);
5661
9
    auto Arg0Ty = Arg0->getType();
5662
9
    auto PTy0 = FTy->getParamType(0);
5663
9
    if (PTy0 != Arg0Val->getType()) {
5664
0
      if (Arg0Ty->isArrayType())
5665
0
        Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
5666
0
      else
5667
0
        Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
5668
0
    }
5669
9
    auto Arg1 = EmitScalarExpr(E->getArg(1));
5670
9
    auto PTy1 = FTy->getParamType(1);
5671
9
    if (PTy1 != Arg1->getType())
5672
0
      Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
5673
9
    return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
5674
10
  }
5675
5676
17
  case Builtin::BI__xray_typedevent: {
5677
    // TODO: There should be a way to always emit events even if the current
5678
    // function is not instrumented. Losing events in a stream can cripple
5679
    // a trace.
5680
17
    if (!ShouldXRayInstrumentFunction())
5681
0
      return RValue::getIgnored();
5682
5683
17
    if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
5684
17
            XRayInstrKind::Typed))
5685
7
      return RValue::getIgnored();
5686
5687
10
    if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5688
10
      if (XRayAttr->neverXRayInstrument() && 
!AlwaysEmitXRayTypedEvents()2
)
5689
1
        return RValue::getIgnored();
5690
5691
9
    Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
5692
9
    auto FTy = F->getFunctionType();
5693
9
    auto Arg0 = EmitScalarExpr(E->getArg(0));
5694
9
    auto PTy0 = FTy->getParamType(0);
5695
9
    if (PTy0 != Arg0->getType())
5696
0
      Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
5697
9
    auto Arg1 = E->getArg(1);
5698
9
    auto Arg1Val = EmitScalarExpr(Arg1);
5699
9
    auto Arg1Ty = Arg1->getType();
5700
9
    auto PTy1 = FTy->getParamType(1);
5701
9
    if (PTy1 != Arg1Val->getType()) {
5702
0
      if (Arg1Ty->isArrayType())
5703
0
        Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
5704
0
      else
5705
0
        Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
5706
0
    }
5707
9
    auto Arg2 = EmitScalarExpr(E->getArg(2));
5708
9
    auto PTy2 = FTy->getParamType(2);
5709
9
    if (PTy2 != Arg2->getType())
5710
0
      Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
5711
9
    return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
5712
10
  }
5713
5714
8
  case Builtin::BI__builtin_ms_va_start:
5715
16
  case Builtin::BI__builtin_ms_va_end:
5716
16
    return RValue::get(
5717
16
        EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
5718
16
                       BuiltinID == Builtin::BI__builtin_ms_va_start));
5719
5720
6
  case Builtin::BI__builtin_ms_va_copy: {
5721
    // Lower this manually. We can't reliably determine whether or not any
5722
    // given va_copy() is for a Win64 va_list from the calling convention
5723
    // alone, because it's legal to do this from a System V ABI function.
5724
    // With opaque pointer types, we won't have enough information in LLVM
5725
    // IR to determine this from the argument types, either. Best to do it
5726
    // now, while we have enough information.
5727
6
    Address DestAddr = EmitMSVAListRef(E->getArg(0));
5728
6
    Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5729
5730
6
    llvm::Type *BPP = Int8PtrPtrTy;
5731
5732
6
    DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
5733
6
                       Int8PtrTy, DestAddr.getAlignment());
5734
6
    SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
5735
6
                      Int8PtrTy, SrcAddr.getAlignment());
5736
5737
6
    Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5738
6
    return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
5739
8
  }
5740
5741
4
  case Builtin::BI__builtin_get_device_side_mangled_name: {
5742
4
    auto Name = CGM.getCUDARuntime().getDeviceSideName(
5743
4
        cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
5744
4
    auto Str = CGM.GetAddrOfConstantCString(Name, "");
5745
4
    llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
5746
4
                               llvm::ConstantInt::get(SizeTy, 0)};
5747
4
    auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
5748
4
                                                     Str.getPointer(), Zeros);
5749
4
    return RValue::get(Ptr);
5750
8
  }
5751
112k
  }
5752
5753
  // If this is an alias for a lib function (e.g. __builtin_sin), emit
5754
  // the call using the normal call path, but using the unmangled
5755
  // version of the function name.
5756
88.7k
  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
5757
1.81k
    return emitLibraryCall(*this, FD, E,
5758
1.81k
                           CGM.getBuiltinLibFunction(FD, BuiltinID));
5759
5760
  // If this is a predefined lib function (e.g. malloc), emit the call
5761
  // using exactly the normal call path.
5762
86.9k
  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
5763
11.8k
    return emitLibraryCall(*this, FD, E,
5764
11.8k
                      cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
5765
5766
  // Check that a call to a target specific builtin has the correct target
5767
  // features.
5768
  // This is down here to avoid non-target specific builtins, however, if
5769
  // generic builtins start to require generic target features then we
5770
  // can move this up to the beginning of the function.
5771
75.1k
  checkTargetFeatures(E, FD);
5772
5773
75.1k
  if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
5774
13.4k
    LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
5775
5776
  // See if we have a target specific intrinsic.
5777
75.1k
  StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
5778
75.1k
  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
5779
75.1k
  StringRef Prefix =
5780
75.1k
      llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
5781
75.1k
  if (!Prefix.empty()) {
5782
75.1k
    IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
5783
    // NOTE we don't need to perform a compatibility flag check here since the
5784
    // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
5785
    // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
5786
75.1k
    if (IntrinsicID == Intrinsic::not_intrinsic)
5787
66.0k
      IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
5788
75.1k
  }
5789
5790
75.1k
  if (IntrinsicID != Intrinsic::not_intrinsic) {
5791
9.03k
    SmallVector<Value*, 16> Args;
5792
5793
    // Find out if any arguments are required to be integer constant
5794
    // expressions.
5795
9.03k
    unsigned ICEArguments = 0;
5796
9.03k
    ASTContext::GetBuiltinTypeError Error;
5797
9.03k
    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5798
9.03k
    assert(Error == ASTContext::GE_None && "Should not codegen an error");
5799
5800
9.03k
    Function *F = CGM.getIntrinsic(IntrinsicID);
5801
9.03k
    llvm::FunctionType *FTy = F->getFunctionType();
5802
5803
31.8k
    for (unsigned i = 0, e = E->getNumArgs(); i != e; 
++i22.7k
) {
5804
22.7k
      Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
5805
      // If the intrinsic arg type is different from the builtin arg type
5806
      // we need to do a bit cast.
5807
22.7k
      llvm::Type *PTy = FTy->getParamType(i);
5808
22.7k
      if (PTy != ArgValue->getType()) {
5809
        // XXX - vector of pointers?
5810
731
        if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
5811
0
          if (PtrTy->getAddressSpace() !=
5812
0
              ArgValue->getType()->getPointerAddressSpace()) {
5813
0
            ArgValue = Builder.CreateAddrSpaceCast(
5814
0
                ArgValue, llvm::PointerType::get(getLLVMContext(),
5815
0
                                                 PtrTy->getAddressSpace()));
5816
0
          }
5817
0
        }
5818
5819
731
        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
5820
731
               "Must be able to losslessly bit cast to param");
5821
        // Cast vector type (e.g., v256i32) to x86_amx, this only happen
5822
        // in amx intrinsics.
5823
731
        if (PTy->isX86_AMXTy())
5824
25
          ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
5825
25
                                             {ArgValue->getType()}, {ArgValue});
5826
706
        else
5827
706
          ArgValue = Builder.CreateBitCast(ArgValue, PTy);
5828
731
      }
5829
5830
22.7k
      Args.push_back(ArgValue);
5831
22.7k
    }
5832
5833
9.03k
    Value *V = Builder.CreateCall(F, Args);
5834
9.03k
    QualType BuiltinRetType = E->getType();
5835
5836
9.03k
    llvm::Type *RetTy = VoidTy;
5837
9.03k
    if (!BuiltinRetType->isVoidType())
5838
8.40k
      RetTy = ConvertType(BuiltinRetType);
5839
5840
9.03k
    if (RetTy != V->getType()) {
5841
      // XXX - vector of pointers?
5842
467
      if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
5843
0
        if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
5844
0
          V = Builder.CreateAddrSpaceCast(
5845
0
              V, llvm::PointerType::get(getLLVMContext(),
5846
0
                                        PtrTy->getAddressSpace()));
5847
0
        }
5848
0
      }
5849
5850
467
      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
5851
467
             "Must be able to losslessly bit cast result type");
5852
      // Cast x86_amx to vector type (e.g., v256i32), this only happen
5853
      // in amx intrinsics.
5854
467
      if (V->getType()->isX86_AMXTy())
5855
11
        V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
5856
11
                                    {V});
5857
456
      else
5858
456
        V = Builder.CreateBitCast(V, RetTy);
5859
467
    }
5860
5861
9.03k
    if (RetTy->isVoidTy())
5862
636
      return RValue::get(nullptr);
5863
5864
8.40k
    return RValue::get(V);
5865
9.03k
  }
5866
5867
  // Some target-specific builtins can have aggregate return values, e.g.
5868
  // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
5869
  // ReturnValue to be non-null, so that the target-specific emission code can
5870
  // always just emit into it.
5871
66.0k
  TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
5872
66.0k
  if (EvalKind == TEK_Aggregate && 
ReturnValue.isNull()8
) {
5873
2
    Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
5874
2
    ReturnValue = ReturnValueSlot(DestPtr, false);
5875
2
  }
5876
5877
  // Now see if we can emit a target-specific builtin.
5878
66.0k
  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
5879
66.0k
    switch (EvalKind) {
5880
66.0k
    case TEK_Scalar:
5881
66.0k
      if (V->getType()->isVoidTy())
5882
5.95k
        return RValue::get(nullptr);
5883
60.1k
      return RValue::get(V);
5884
8
    case TEK_Aggregate:
5885
8
      return RValue::getAggregate(ReturnValue.getValue(),
5886
8
                                  ReturnValue.isVolatile());
5887
0
    case TEK_Complex:
5888
0
      llvm_unreachable("No current target builtin returns complex");
5889
66.0k
    }
5890
0
    llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
5891
0
  }
5892
5893
1
  if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
5894
1
    return EmitHipStdParUnsupportedBuiltin(this, FD);
5895
5896
0
  ErrorUnsupported(E, "builtin function");
5897
5898
  // Unknown builtin, for now just dump it out and return undef.
5899
0
  return GetUndefRValue(E->getType());
5900
1
}
5901
5902
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
5903
                                        unsigned BuiltinID, const CallExpr *E,
5904
                                        ReturnValueSlot ReturnValue,
5905
66.0k
                                        llvm::Triple::ArchType Arch) {
5906
  // When compiling in HipStdPar mode we have to be conservative in rejecting
5907
  // target specific features in the FE, and defer the possible error to the
5908
  // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
5909
  // referenced by an accelerator executable function, we emit an error.
5910
  // Returning nullptr here leads to the builtin being handled in
5911
  // EmitStdParUnsupportedBuiltin.
5912
66.0k
  if (CGF->getLangOpts().HIPStdPar && 
CGF->getLangOpts().CUDAIsDevice1
&&
5913
66.0k
      
Arch != CGF->getTarget().getTriple().getArch()1
)
5914
1
    return nullptr;
5915
5916
66.0k
  switch (Arch) {
5917
640
  case llvm::Triple::arm:
5918
640
  case llvm::Triple::armeb:
5919
6.39k
  case llvm::Triple::thumb:
5920
6.67k
  case llvm::Triple::thumbeb:
5921
6.67k
    return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
5922
47.8k
  case llvm::Triple::aarch64:
5923
47.9k
  case llvm::Triple::aarch64_32:
5924
47.9k
  case llvm::Triple::aarch64_be:
5925
47.9k
    return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
5926
0
  case llvm::Triple::bpfeb:
5927
0
  case llvm::Triple::bpfel:
5928
0
    return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
5929
959
  case llvm::Triple::x86:
5930
9.30k
  case llvm::Triple::x86_64:
5931
9.30k
    return CGF->EmitX86BuiltinExpr(BuiltinID, E);
5932
5
  case llvm::Triple::ppc:
5933
5
  case llvm::Triple::ppcle:
5934
131
  case llvm::Triple::ppc64:
5935
269
  case llvm::Triple::ppc64le:
5936
269
    return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
5937
4
  case llvm::Triple::r600:
5938
439
  case llvm::Triple::amdgcn:
5939
439
    return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
5940
0
  case llvm::Triple::systemz:
5941
0
    return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
5942
463
  case llvm::Triple::nvptx:
5943
1.09k
  case llvm::Triple::nvptx64:
5944
1.09k
    return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
5945
127
  case llvm::Triple::wasm32:
5946
283
  case llvm::Triple::wasm64:
5947
283
    return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
5948
0
  case llvm::Triple::hexagon:
5949
0
    return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
5950
23
  case llvm::Triple::riscv32:
5951
52
  case llvm::Triple::riscv64:
5952
52
    return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
5953
0
  default:
5954
0
    return nullptr;
5955
66.0k
  }
5956
66.0k
}
5957
5958
Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
5959
                                              const CallExpr *E,
5960
66.0k
                                              ReturnValueSlot ReturnValue) {
5961
66.0k
  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
5962
1
    assert(getContext().getAuxTargetInfo() && "Missing aux target info");
5963
1
    return EmitTargetArchBuiltinExpr(
5964
1
        this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
5965
1
        ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
5966
1
  }
5967
5968
66.0k
  return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
5969
66.0k
                                   getTarget().getTriple().getArch());
5970
66.0k
}
5971
5972
static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
5973
                                          NeonTypeFlags TypeFlags,
5974
                                          bool HasLegalHalfType = true,
5975
                                          bool V1Ty = false,
5976
7.47k
                                          bool AllowBFloatArgsAndRet = true) {
5977
7.47k
  int IsQuad = TypeFlags.isQuad();
5978
7.47k
  switch (TypeFlags.getEltType()) {
5979
1.15k
  case NeonTypeFlags::Int8:
5980
1.43k
  case NeonTypeFlags::Poly8:
5981
1.43k
    return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 
10
: (8 << IsQuad));
5982
1.58k
  case NeonTypeFlags::Int16:
5983
1.80k
  case NeonTypeFlags::Poly16:
5984
1.80k
    return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 
10
: (4 << IsQuad));
5985
210
  case NeonTypeFlags::BFloat16:
5986
210
    if (AllowBFloatArgsAndRet)
5987
205
      return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 
10
: (4 << IsQuad));
5988
5
    else
5989
5
      return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 
10
: (4 << IsQuad));
5990
477
  case NeonTypeFlags::Float16:
5991
477
    if (HasLegalHalfType)
5992
441
      return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 
10
: (4 << IsQuad));
5993
36
    else
5994
36
      return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 
10
: (4 << IsQuad));
5995
1.74k
  case NeonTypeFlags::Int32:
5996
1.74k
    return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 
10
: (2 << IsQuad));
5997
770
  case NeonTypeFlags::Int64:
5998
862
  case NeonTypeFlags::Poly64:
5999
862
    return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 
10
: (1 << IsQuad));
6000
12
  case NeonTypeFlags::Poly128:
6001
    // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6002
    // There is a lot of i128 and f128 API missing.
6003
    // so we use v16i8 to represent poly128 and get pattern matched.
6004
12
    return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6005
627
  case NeonTypeFlags::Float32:
6006
627
    return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 
10
: (2 << IsQuad));
6007
300
  case NeonTypeFlags::Float64:
6008
300
    return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 
10
: (1 << IsQuad));
6009
7.47k
  }
6010
0
  llvm_unreachable("Unknown vector element type!");
6011
0
}
6012
6013
static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6014
142
                                          NeonTypeFlags IntTypeFlags) {
6015
142
  int IsQuad = IntTypeFlags.isQuad();
6016
142
  switch (IntTypeFlags.getEltType()) {
6017
54
  case NeonTypeFlags::Int16:
6018
54
    return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6019
56
  case NeonTypeFlags::Int32:
6020
56
    return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6021
32
  case NeonTypeFlags::Int64:
6022
32
    return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6023
0
  default:
6024
0
    llvm_unreachable("Type can't be converted to floating-point!");
6025
142
  }
6026
142
}
6027
6028
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
6029
536
                                      const ElementCount &Count) {
6030
536
  Value *SV = llvm::ConstantVector::getSplat(Count, C);
6031
536
  return Builder.CreateShuffleVector(V, V, SV, "lane");
6032
536
}
6033
6034
75
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
6035
75
  ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6036
75
  return EmitNeonSplat(V, C, EC);
6037
75
}
6038
6039
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
6040
                                     const char *name,
6041
2.73k
                                     unsigned shift, bool rightshift) {
6042
2.73k
  unsigned j = 0;
6043
2.73k
  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6044
9.39k
       ai != ae; 
++ai, ++j6.65k
) {
6045
6.65k
    if (F->isConstrainedFPIntrinsic())
6046
172
      if (ai->getType()->isMetadataTy())
6047
92
        continue;
6048
6.56k
    if (shift > 0 && 
shift == j242
)
6049
121
      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6050
6.44k
    else
6051
6.44k
      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6052
6.56k
  }
6053
6054
2.73k
  if (F->isConstrainedFPIntrinsic())
6055
56
    return Builder.CreateConstrainedFPCall(F, Ops, name);
6056
2.68k
  else
6057
2.68k
    return Builder.CreateCall(F, Ops, name);
6058
2.73k
}
6059
6060
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
6061
338
                                            bool neg) {
6062
338
  int SV = cast<ConstantInt>(V)->getSExtValue();
6063
338
  return ConstantInt::get(Ty, neg ? 
-SV109
:
SV229
);
6064
338
}
6065
6066
// Right-shift a vector by a constant.
6067
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
6068
                                          llvm::Type *Ty, bool usgn,
6069
76
                                          const char *name) {
6070
76
  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6071
6072
76
  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6073
76
  int EltSize = VTy->getScalarSizeInBits();
6074
6075
76
  Vec = Builder.CreateBitCast(Vec, Ty);
6076
6077
  // lshr/ashr are undefined when the shift amount is equal to the vector
6078
  // element size.
6079
76
  if (ShiftAmt == EltSize) {
6080
8
    if (usgn) {
6081
      // Right-shifting an unsigned value by its size yields 0.
6082
4
      return llvm::ConstantAggregateZero::get(VTy);
6083
4
    } else {
6084
      // Right-shifting a signed value by its size is equivalent
6085
      // to a shift of size-1.
6086
4
      --ShiftAmt;
6087
4
      Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6088
4
    }
6089
8
  }
6090
6091
72
  Shift = EmitNeonShiftVector(Shift, Ty, false);
6092
72
  if (usgn)
6093
36
    return Builder.CreateLShr(Vec, Shift, name);
6094
36
  else
6095
36
    return Builder.CreateAShr(Vec, Shift, name);
6096
72
}
6097
6098
enum {
6099
  AddRetType = (1 << 0),
6100
  Add1ArgType = (1 << 1),
6101
  Add2ArgTypes = (1 << 2),
6102
6103
  VectorizeRetType = (1 << 3),
6104
  VectorizeArgTypes = (1 << 4),
6105
6106
  InventFloatType = (1 << 5),
6107
  UnsignedAlts = (1 << 6),
6108
6109
  Use64BitVectors = (1 << 7),
6110
  Use128BitVectors = (1 << 8),
6111
6112
  Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
6113
  VectorRet = AddRetType | VectorizeRetType,
6114
  VectorRetGetArgs01 =
6115
      AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
6116
  FpCmpzModifiers =
6117
      AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
6118
};
6119
6120
namespace {
6121
struct ARMVectorIntrinsicInfo {
6122
  const char *NameHint;
6123
  unsigned BuiltinID;
6124
  unsigned LLVMIntrinsic;
6125
  unsigned AltLLVMIntrinsic;
6126
  uint64_t TypeModifier;
6127
6128
579k
  bool operator<(unsigned RHSBuiltinID) const {
6129
579k
    return BuiltinID < RHSBuiltinID;
6130
579k
  }
6131
12.2M
  bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6132
12.2M
    return BuiltinID < TE.BuiltinID;
6133
12.2M
  }
6134
};
6135
} // end anonymous namespace
6136
6137
#define NEONMAP0(NameBase) \
6138
  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6139
6140
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6141
  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6142
      Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6143
6144
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6145
  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6146
      Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6147
      TypeModifier }
6148
6149
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6150
  NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6151
  NEONMAP0(splat_lane_v),
6152
  NEONMAP0(splat_laneq_v),
6153
  NEONMAP0(splatq_lane_v),
6154
  NEONMAP0(splatq_laneq_v),
6155
  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6156
  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6157
  NEONMAP1(vabs_v, arm_neon_vabs, 0),
6158
  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6159
  NEONMAP0(vadd_v),
6160
  NEONMAP0(vaddhn_v),
6161
  NEONMAP0(vaddq_v),
6162
  NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6163
  NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6164
  NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6165
  NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6166
  NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6167
  NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6168
  NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6169
  NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6170
  NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6171
  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6172
  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6173
  NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6174
  NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6175
  NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6176
  NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6177
  NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6178
  NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6179
  NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6180
  NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6181
  NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6182
  NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6183
  NEONMAP1(vcage_v, arm_neon_vacge, 0),
6184
  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6185
  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6186
  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6187
  NEONMAP1(vcale_v, arm_neon_vacge, 0),
6188
  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6189
  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6190
  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6191
  NEONMAP0(vceqz_v),
6192
  NEONMAP0(vceqzq_v),
6193
  NEONMAP0(vcgez_v),
6194
  NEONMAP0(vcgezq_v),
6195
  NEONMAP0(vcgtz_v),
6196
  NEONMAP0(vcgtzq_v),
6197
  NEONMAP0(vclez_v),
6198
  NEONMAP0(vclezq_v),
6199
  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6200
  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6201
  NEONMAP0(vcltz_v),
6202
  NEONMAP0(vcltzq_v),
6203
  NEONMAP1(vclz_v, ctlz, Add1ArgType),
6204
  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6205
  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6206
  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6207
  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6208
  NEONMAP0(vcvt_f16_s16),
6209
  NEONMAP0(vcvt_f16_u16),
6210
  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6211
  NEONMAP0(vcvt_f32_v),
6212
  NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6213
  NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6214
  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6215
  NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6216
  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6217
  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6218
  NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6219
  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6220
  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6221
  NEONMAP0(vcvt_s16_f16),
6222
  NEONMAP0(vcvt_s32_v),
6223
  NEONMAP0(vcvt_s64_v),
6224
  NEONMAP0(vcvt_u16_f16),
6225
  NEONMAP0(vcvt_u32_v),
6226
  NEONMAP0(vcvt_u64_v),
6227
  NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6228
  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6229
  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6230
  NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6231
  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6232
  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6233
  NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6234
  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6235
  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6236
  NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6237
  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6238
  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6239
  NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6240
  NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6241
  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6242
  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6243
  NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6244
  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6245
  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6246
  NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6247
  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6248
  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6249
  NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6250
  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6251
  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6252
  NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6253
  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6254
  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6255
  NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6256
  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6257
  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6258
  NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6259
  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6260
  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6261
  NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6262
  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6263
  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6264
  NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6265
  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6266
  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6267
  NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6268
  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6269
  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6270
  NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6271
  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6272
  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6273
  NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6274
  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6275
  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6276
  NEONMAP0(vcvtq_f16_s16),
6277
  NEONMAP0(vcvtq_f16_u16),
6278
  NEONMAP0(vcvtq_f32_v),
6279
  NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6280
  NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6281
  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6282
  NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6283
  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6284
  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6285
  NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6286
  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6287
  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6288
  NEONMAP0(vcvtq_s16_f16),
6289
  NEONMAP0(vcvtq_s32_v),
6290
  NEONMAP0(vcvtq_s64_v),
6291
  NEONMAP0(vcvtq_u16_f16),
6292
  NEONMAP0(vcvtq_u32_v),
6293
  NEONMAP0(vcvtq_u64_v),
6294
  NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6295
  NEONMAP1(vdot_u32, arm_neon_udot, 0),
6296
  NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6297
  NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6298
  NEONMAP0(vext_v),
6299
  NEONMAP0(vextq_v),
6300
  NEONMAP0(vfma_v),
6301
  NEONMAP0(vfmaq_v),
6302
  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6303
  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6304
  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6305
  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6306
  NEONMAP0(vld1_dup_v),
6307
  NEONMAP1(vld1_v, arm_neon_vld1, 0),
6308
  NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6309
  NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6310
  NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6311
  NEONMAP0(vld1q_dup_v),
6312
  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6313
  NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6314
  NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6315
  NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6316
  NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6317
  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6318
  NEONMAP1(vld2_v, arm_neon_vld2, 0),
6319
  NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6320
  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6321
  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6322
  NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6323
  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6324
  NEONMAP1(vld3_v, arm_neon_vld3, 0),
6325
  NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6326
  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6327
  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6328
  NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6329
  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6330
  NEONMAP1(vld4_v, arm_neon_vld4, 0),
6331
  NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6332
  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6333
  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6334
  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6335
  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6336
  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6337
  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6338
  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6339
  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6340
  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6341
  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6342
  NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6343
  NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6344
  NEONMAP0(vmovl_v),
6345
  NEONMAP0(vmovn_v),
6346
  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6347
  NEONMAP0(vmull_v),
6348
  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6349
  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6350
  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6351
  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6352
  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6353
  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6354
  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6355
  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6356
  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6357
  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6358
  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6359
  NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6360
  NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6361
  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6362
  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6363
  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6364
  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6365
  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6366
  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6367
  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6368
  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6369
  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6370
  NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6371
  NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6372
  NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6373
  NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6374
  NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6375
  NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6376
  NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6377
  NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6378
  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6379
  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6380
  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6381
  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6382
  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6383
  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6384
  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6385
  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6386
  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6387
  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6388
  NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6389
  NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6390
  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6391
  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6392
  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6393
  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6394
  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6395
  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6396
  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6397
  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6398
  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6399
  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6400
  NEONMAP0(vrndi_v),
6401
  NEONMAP0(vrndiq_v),
6402
  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6403
  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6404
  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6405
  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6406
  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6407
  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6408
  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6409
  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6410
  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6411
  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6412
  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6413
  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6414
  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6415
  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6416
  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6417
  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6418
  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6419
  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6420
  NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6421
  NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6422
  NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6423
  NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6424
  NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6425
  NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6426
  NEONMAP0(vshl_n_v),
6427
  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6428
  NEONMAP0(vshll_n_v),
6429
  NEONMAP0(vshlq_n_v),
6430
  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6431
  NEONMAP0(vshr_n_v),
6432
  NEONMAP0(vshrn_n_v),
6433
  NEONMAP0(vshrq_n_v),
6434
  NEONMAP1(vst1_v, arm_neon_vst1, 0),
6435
  NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6436
  NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6437
  NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6438
  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6439
  NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6440
  NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6441
  NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6442
  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6443
  NEONMAP1(vst2_v, arm_neon_vst2, 0),
6444
  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6445
  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6446
  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6447
  NEONMAP1(vst3_v, arm_neon_vst3, 0),
6448
  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6449
  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6450
  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6451
  NEONMAP1(vst4_v, arm_neon_vst4, 0),
6452
  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6453
  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6454
  NEONMAP0(vsubhn_v),
6455
  NEONMAP0(vtrn_v),
6456
  NEONMAP0(vtrnq_v),
6457
  NEONMAP0(vtst_v),
6458
  NEONMAP0(vtstq_v),
6459
  NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6460
  NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6461
  NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6462
  NEONMAP0(vuzp_v),
6463
  NEONMAP0(vuzpq_v),
6464
  NEONMAP0(vzip_v),
6465
  NEONMAP0(vzipq_v)
6466
};
6467
6468
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6469
  NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6470
  NEONMAP0(splat_lane_v),
6471
  NEONMAP0(splat_laneq_v),
6472
  NEONMAP0(splatq_lane_v),
6473
  NEONMAP0(splatq_laneq_v),
6474
  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6475
  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6476
  NEONMAP0(vadd_v),
6477
  NEONMAP0(vaddhn_v),
6478
  NEONMAP0(vaddq_p128),
6479
  NEONMAP0(vaddq_v),
6480
  NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6481
  NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6482
  NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6483
  NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6484
  NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6485
  NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6486
  NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6487
  NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6488
  NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6489
  NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6490
  NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6491
  NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6492
  NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6493
  NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6494
  NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6495
  NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6496
  NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6497
  NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6498
  NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6499
  NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6500
  NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6501
  NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6502
  NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6503
  NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6504
  NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6505
  NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6506
  NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6507
  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6508
  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6509
  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6510
  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6511
  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6512
  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6513
  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6514
  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6515
  NEONMAP0(vceqz_v),
6516
  NEONMAP0(vceqzq_v),
6517
  NEONMAP0(vcgez_v),
6518
  NEONMAP0(vcgezq_v),
6519
  NEONMAP0(vcgtz_v),
6520
  NEONMAP0(vcgtzq_v),
6521
  NEONMAP0(vclez_v),
6522
  NEONMAP0(vclezq_v),
6523
  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6524
  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6525
  NEONMAP0(vcltz_v),
6526
  NEONMAP0(vcltzq_v),
6527
  NEONMAP1(vclz_v, ctlz, Add1ArgType),
6528
  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6529
  NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6530
  NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6531
  NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6532
  NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6533
  NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6534
  NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6535
  NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6536
  NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6537
  NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6538
  NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6539
  NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6540
  NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6541
  NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6542
  NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6543
  NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6544
  NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6545
  NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6546
  NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6547
  NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6548
  NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6549
  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6550
  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6551
  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6552
  NEONMAP0(vcvt_f16_s16),
6553
  NEONMAP0(vcvt_f16_u16),
6554
  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6555
  NEONMAP0(vcvt_f32_v),
6556
  NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6557
  NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6558
  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6559
  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6560
  NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6561
  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6562
  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6563
  NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6564
  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6565
  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6566
  NEONMAP0(vcvtq_f16_s16),
6567
  NEONMAP0(vcvtq_f16_u16),
6568
  NEONMAP0(vcvtq_f32_v),
6569
  NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6570
  NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6571
  NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6572
  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6573
  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6574
  NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6575
  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6576
  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6577
  NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6578
  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6579
  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6580
  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6581
  NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6582
  NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6583
  NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6584
  NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6585
  NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6586
  NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6587
  NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6588
  NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6589
  NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6590
  NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6591
  NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6592
  NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6593
  NEONMAP0(vext_v),
6594
  NEONMAP0(vextq_v),
6595
  NEONMAP0(vfma_v),
6596
  NEONMAP0(vfmaq_v),
6597
  NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6598
  NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6599
  NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6600
  NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6601
  NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6602
  NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6603
  NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6604
  NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6605
  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6606
  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6607
  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6608
  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6609
  NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6610
  NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6611
  NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6612
  NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6613
  NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6614
  NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6615
  NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6616
  NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6617
  NEONMAP0(vmovl_v),
6618
  NEONMAP0(vmovn_v),
6619
  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6620
  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6621
  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6622
  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6623
  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6624
  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6625
  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6626
  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6627
  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6628
  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6629
  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6630
  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6631
  NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6632
  NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6633
  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6634
  NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6635
  NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6636
  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6637
  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6638
  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6639
  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6640
  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6641
  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6642
  NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6643
  NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6644
  NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6645
  NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6646
  NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6647
  NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6648
  NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6649
  NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6650
  NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6651
  NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6652
  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6653
  NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6654
  NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6655
  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6656
  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6657
  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6658
  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6659
  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6660
  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6661
  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6662
  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6663
  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6664
  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6665
  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6666
  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6667
  NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
6668
  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6669
  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6670
  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6671
  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6672
  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6673
  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6674
  NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
6675
  NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
6676
  NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
6677
  NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
6678
  NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
6679
  NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
6680
  NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
6681
  NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
6682
  NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
6683
  NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
6684
  NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
6685
  NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
6686
  NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
6687
  NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
6688
  NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
6689
  NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
6690
  NEONMAP0(vrndi_v),
6691
  NEONMAP0(vrndiq_v),
6692
  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6693
  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6694
  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6695
  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6696
  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6697
  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6698
  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
6699
  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
6700
  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
6701
  NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
6702
  NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
6703
  NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
6704
  NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
6705
  NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
6706
  NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
6707
  NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
6708
  NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
6709
  NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
6710
  NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
6711
  NEONMAP0(vshl_n_v),
6712
  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6713
  NEONMAP0(vshll_n_v),
6714
  NEONMAP0(vshlq_n_v),
6715
  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6716
  NEONMAP0(vshr_n_v),
6717
  NEONMAP0(vshrn_n_v),
6718
  NEONMAP0(vshrq_n_v),
6719
  NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
6720
  NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
6721
  NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
6722
  NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
6723
  NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
6724
  NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
6725
  NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
6726
  NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
6727
  NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
6728
  NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
6729
  NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
6730
  NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
6731
  NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
6732
  NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
6733
  NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
6734
  NEONMAP0(vsubhn_v),
6735
  NEONMAP0(vtst_v),
6736
  NEONMAP0(vtstq_v),
6737
  NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
6738
  NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
6739
  NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
6740
  NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
6741
};
6742
6743
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
6744
  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
6745
  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
6746
  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
6747
  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6748
  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6749
  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6750
  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6751
  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6752
  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6753
  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6754
  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6755
  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
6756
  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6757
  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
6758
  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6759
  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6760
  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6761
  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6762
  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6763
  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6764
  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6765
  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6766
  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6767
  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6768
  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6769
  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6770
  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6771
  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6772
  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6773
  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6774
  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6775
  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6776
  NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6777
  NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6778
  NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
6779
  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6780
  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6781
  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6782
  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6783
  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6784
  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6785
  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6786
  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6787
  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6788
  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6789
  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6790
  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6791
  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6792
  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6793
  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6794
  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6795
  NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6796
  NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6797
  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
6798
  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6799
  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6800
  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6801
  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6802
  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6803
  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6804
  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6805
  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6806
  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6807
  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6808
  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6809
  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6810
  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6811
  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6812
  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6813
  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6814
  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6815
  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6816
  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6817
  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6818
  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
6819
  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
6820
  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
6821
  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6822
  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6823
  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6824
  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6825
  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6826
  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6827
  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6828
  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6829
  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6830
  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6831
  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6832
  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
6833
  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6834
  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
6835
  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6836
  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6837
  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
6838
  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
6839
  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6840
  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6841
  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
6842
  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
6843
  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
6844
  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
6845
  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
6846
  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
6847
  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
6848
  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
6849
  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6850
  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6851
  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6852
  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6853
  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
6854
  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6855
  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6856
  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6857
  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
6858
  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6859
  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
6860
  NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
6861
  NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6862
  NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
6863
  NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6864
  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
6865
  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
6866
  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6867
  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6868
  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
6869
  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
6870
  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6871
  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6872
  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
6873
  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
6874
  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
6875
  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
6876
  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6877
  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6878
  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6879
  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6880
  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
6881
  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6882
  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6883
  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6884
  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6885
  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6886
  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6887
  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
6888
  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
6889
  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6890
  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6891
  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6892
  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6893
  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
6894
  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
6895
  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
6896
  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
6897
  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6898
  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6899
  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
6900
  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
6901
  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
6902
  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6903
  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6904
  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6905
  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6906
  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
6907
  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
6908
  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
6909
  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
6910
  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
6911
  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
6912
  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
6913
  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
6914
  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
6915
  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
6916
  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
6917
  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
6918
  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
6919
  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
6920
  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
6921
  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
6922
  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
6923
  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
6924
  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
6925
  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
6926
  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
6927
  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
6928
  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
6929
  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
6930
  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
6931
  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
6932
  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
6933
  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
6934
  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
6935
  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
6936
  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
6937
  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
6938
  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
6939
  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
6940
  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
6941
  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
6942
  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
6943
  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
6944
  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
6945
  // FP16 scalar intrinisics go here.
6946
  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
6947
  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6948
  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6949
  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6950
  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6951
  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6952
  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6953
  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6954
  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6955
  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6956
  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6957
  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6958
  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6959
  NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6960
  NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6961
  NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6962
  NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6963
  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6964
  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6965
  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6966
  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6967
  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6968
  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6969
  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6970
  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6971
  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6972
  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6973
  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6974
  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6975
  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
6976
  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
6977
  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
6978
  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
6979
  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
6980
};
6981
6982
// Some intrinsics are equivalent for codegen.
6983
static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
6984
  { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
6985
  { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
6986
  { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
6987
  { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
6988
  { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
6989
  { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
6990
  { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
6991
  { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
6992
  { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
6993
  { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
6994
  { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
6995
  { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
6996
  { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
6997
  { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
6998
  { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
6999
  { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7000
  { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7001
  { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7002
  { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7003
  { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7004
  { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7005
  { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7006
  { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7007
  { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7008
  { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7009
  { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7010
  { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7011
  { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7012
  { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
7013
  { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
7014
  { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7015
  { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7016
  { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7017
  { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7018
  { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7019
  { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7020
  { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7021
  { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7022
  { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7023
  { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7024
  { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7025
  { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7026
  { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7027
  { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7028
  { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7029
  { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7030
  { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7031
  { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7032
  { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7033
  { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7034
  { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7035
  { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7036
  { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7037
  { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7038
  { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7039
  { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7040
  { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7041
  { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7042
  { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7043
  { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7044
  { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7045
  { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7046
  { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7047
  { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7048
  { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7049
  { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7050
  { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7051
  { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7052
  { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7053
  { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7054
  { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7055
  { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7056
  { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7057
  { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7058
  { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7059
  { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7060
  { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7061
  { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7062
  { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7063
  { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7064
  { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7065
  { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7066
  { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7067
  { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7068
  { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7069
  { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7070
  { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7071
  { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7072
  { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7073
  { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7074
  { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7075
  { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7076
  { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7077
  { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7078
  { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7079
  { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7080
  { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7081
  { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7082
  { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7083
  { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7084
  { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7085
  { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7086
  { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7087
  { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7088
  { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7089
  { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7090
  { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7091
  { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7092
  { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7093
  { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7094
  { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7095
  { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7096
  { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7097
  { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7098
  { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7099
  { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7100
  { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7101
  { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7102
  { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7103
  { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7104
  { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7105
  { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7106
  { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7107
  { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7108
  { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7109
  { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7110
  { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7111
  { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7112
  { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7113
  { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7114
  { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7115
  { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7116
  { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, },
7117
  { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, },
7118
  { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, },
7119
  { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, },
7120
  { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, },
7121
  { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, },
7122
  // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7123
  // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7124
  // arbitrary one to be handled as tha canonical variation.
7125
  { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7126
  { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7127
  { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7128
  { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7129
  { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7130
  { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7131
  { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7132
  { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7133
  { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7134
  { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7135
  { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7136
  { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7137
};
7138
7139
#undef NEONMAP0
7140
#undef NEONMAP1
7141
#undef NEONMAP2
7142
7143
#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
7144
  {                                                                            \
7145
    #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0,   \
7146
        TypeModifier                                                           \
7147
  }
7148
7149
#define SVEMAP2(NameBase, TypeModifier)                                        \
7150
  { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7151
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7152
#define GET_SVE_LLVM_INTRINSIC_MAP
7153
#include "clang/Basic/arm_sve_builtin_cg.inc"
7154
#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7155
#undef GET_SVE_LLVM_INTRINSIC_MAP
7156
};
7157
7158
#undef SVEMAP1
7159
#undef SVEMAP2
7160
7161
#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
7162
  {                                                                            \
7163
    #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0,   \
7164
        TypeModifier                                                           \
7165
  }
7166
7167
#define SMEMAP2(NameBase, TypeModifier)                                        \
7168
  { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7169
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7170
#define GET_SME_LLVM_INTRINSIC_MAP
7171
#include "clang/Basic/arm_sme_builtin_cg.inc"
7172
#undef GET_SME_LLVM_INTRINSIC_MAP
7173
};
7174
7175
#undef SMEMAP1
7176
#undef SMEMAP2
7177
7178
static bool NEONSIMDIntrinsicsProvenSorted = false;
7179
7180
static bool AArch64SIMDIntrinsicsProvenSorted = false;
7181
static bool AArch64SISDIntrinsicsProvenSorted = false;
7182
static bool AArch64SVEIntrinsicsProvenSorted = false;
7183
static bool AArch64SMEIntrinsicsProvenSorted = false;
7184
7185
static const ARMVectorIntrinsicInfo *
7186
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
7187
49.2k
                            unsigned BuiltinID, bool &MapProvenSorted) {
7188
7189
49.2k
#ifndef NDEBUG
7190
49.2k
  if (!MapProvenSorted) {
7191
2.18k
    assert(llvm::is_sorted(IntrinsicMap));
7192
2.18k
    MapProvenSorted = true;
7193
2.18k
  }
7194
49.2k
#endif
7195
7196
49.2k
  const ARMVectorIntrinsicInfo *Builtin =
7197
49.2k
      llvm::lower_bound(IntrinsicMap, BuiltinID);
7198
7199
49.2k
  if (Builtin != IntrinsicMap.end() && 
Builtin->BuiltinID == BuiltinID49.2k
)
7200
44.1k
    return Builtin;
7201
7202
5.14k
  return nullptr;
7203
49.2k
}
7204
7205
Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
7206
                                                   unsigned Modifier,
7207
                                                   llvm::Type *ArgType,
7208
1.00k
                                                   const CallExpr *E) {
7209
1.00k
  int VectorSize = 0;
7210
1.00k
  if (Modifier & Use64BitVectors)
7211
58
    VectorSize = 64;
7212
948
  else if (Modifier & Use128BitVectors)
7213
2
    VectorSize = 128;
7214
7215
  // Return type.
7216
1.00k
  SmallVector<llvm::Type *, 3> Tys;
7217
1.00k
  if (Modifier & AddRetType) {
7218
168
    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7219
168
    if (Modifier & VectorizeRetType)
7220
20
      Ty = llvm::FixedVectorType::get(
7221
20
          Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 
10
);
7222
7223
168
    Tys.push_back(Ty);
7224
168
  }
7225
7226
  // Arguments.
7227
1.00k
  if (Modifier & VectorizeArgTypes) {
7228
44
    int Elts = VectorSize ? 
VectorSize / ArgType->getPrimitiveSizeInBits()40
:
14
;
7229
44
    ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7230
44
  }
7231
7232
1.00k
  if (Modifier & (Add1ArgType | Add2ArgTypes))
7233
897
    Tys.push_back(ArgType);
7234
7235
1.00k
  if (Modifier & Add2ArgTypes)
7236
0
    Tys.push_back(ArgType);
7237
7238
1.00k
  if (Modifier & InventFloatType)
7239
0
    Tys.push_back(FloatTy);
7240
7241
1.00k
  return CGM.getIntrinsic(IntrinsicID, Tys);
7242
1.00k
}
7243
7244
static Value *EmitCommonNeonSISDBuiltinExpr(
7245
    CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7246
271
    SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7247
271
  unsigned BuiltinID = SISDInfo.BuiltinID;
7248
271
  unsigned int Int = SISDInfo.LLVMIntrinsic;
7249
271
  unsigned Modifier = SISDInfo.TypeModifier;
7250
271
  const char *s = SISDInfo.NameHint;
7251
7252
271
  switch (BuiltinID) {
7253
0
  case NEON::BI__builtin_neon_vcled_s64:
7254
0
  case NEON::BI__builtin_neon_vcled_u64:
7255
0
  case NEON::BI__builtin_neon_vcles_f32:
7256
0
  case NEON::BI__builtin_neon_vcled_f64:
7257
0
  case NEON::BI__builtin_neon_vcltd_s64:
7258
0
  case NEON::BI__builtin_neon_vcltd_u64:
7259
0
  case NEON::BI__builtin_neon_vclts_f32:
7260
0
  case NEON::BI__builtin_neon_vcltd_f64:
7261
1
  case NEON::BI__builtin_neon_vcales_f32:
7262
2
  case NEON::BI__builtin_neon_vcaled_f64:
7263
3
  case NEON::BI__builtin_neon_vcalts_f32:
7264
4
  case NEON::BI__builtin_neon_vcaltd_f64:
7265
    // Only one direction of comparisons actually exist, cmle is actually a cmge
7266
    // with swapped operands. The table gives us the right intrinsic but we
7267
    // still need to do the swap.
7268
4
    std::swap(Ops[0], Ops[1]);
7269
4
    break;
7270
271
  }
7271
7272
271
  assert(Int && "Generic code assumes a valid intrinsic");
7273
7274
  // Determine the type(s) of this overloaded AArch64 intrinsic.
7275
271
  const Expr *Arg = E->getArg(0);
7276
271
  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7277
271
  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7278
7279
271
  int j = 0;
7280
271
  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7281
271
  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7282
705
       ai != ae; 
++ai, ++j434
) {
7283
434
    llvm::Type *ArgTy = ai->getType();
7284
434
    if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7285
434
             ArgTy->getPrimitiveSizeInBits())
7286
334
      continue;
7287
7288
100
    assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7289
    // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7290
    // it before inserting.
7291
100
    Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7292
100
        Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7293
100
    Ops[j] =
7294
100
        CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7295
100
  }
7296
7297
271
  Value *Result = CGF.EmitNeonCall(F, Ops, s);
7298
271
  llvm::Type *ResultType = CGF.ConvertType(E->getType());
7299
271
  if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7300
271
      Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7301
60
    return CGF.Builder.CreateExtractElement(Result, C0);
7302
7303
211
  return CGF.Builder.CreateBitCast(Result, ResultType, s);
7304
271
}
7305
7306
Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
7307
    unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7308
    const char *NameHint, unsigned Modifier, const CallExpr *E,
7309
    SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7310
2.84k
    llvm::Triple::ArchType Arch) {
7311
  // Get the last argument, which specifies the vector type.
7312
2.84k
  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7313
2.84k
  std::optional<llvm::APSInt> NeonTypeConst =
7314
2.84k
      Arg->getIntegerConstantExpr(getContext());
7315
2.84k
  if (!NeonTypeConst)
7316
0
    return nullptr;
7317
7318
  // Determine the type of this overloaded NEON intrinsic.
7319
2.84k
  NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7320
2.84k
  bool Usgn = Type.isUnsigned();
7321
2.84k
  bool Quad = Type.isQuad();
7322
2.84k
  const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7323
2.84k
  const bool AllowBFloatArgsAndRet =
7324
2.84k
      getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7325
7326
2.84k
  llvm::FixedVectorType *VTy =
7327
2.84k
      GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7328
2.84k
  llvm::Type *Ty = VTy;
7329
2.84k
  if (!Ty)
7330
0
    return nullptr;
7331
7332
2.84k
  auto getAlignmentValue32 = [&](Address addr) -> Value* {
7333
390
    return Builder.getInt32(addr.getAlignment().getQuantity());
7334
390
  };
7335
7336
2.84k
  unsigned Int = LLVMIntrinsic;
7337
2.84k
  if ((Modifier & UnsignedAlts) && 
!Usgn456
)
7338
236
    Int = AltLLVMIntrinsic;
7339
7340
2.84k
  switch (BuiltinID) {
7341
684
  default: break;
7342
684
  case NEON::BI__builtin_neon_splat_lane_v:
7343
313
  case NEON::BI__builtin_neon_splat_laneq_v:
7344
407
  case NEON::BI__builtin_neon_splatq_lane_v:
7345
461
  case NEON::BI__builtin_neon_splatq_laneq_v: {
7346
461
    auto NumElements = VTy->getElementCount();
7347
461
    if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7348
94
      NumElements = NumElements * 2;
7349
461
    if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7350
127
      NumElements = NumElements.divideCoefficientBy(2);
7351
7352
461
    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7353
461
    return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7354
407
  }
7355
16
  case NEON::BI__builtin_neon_vpadd_v:
7356
27
  case NEON::BI__builtin_neon_vpaddq_v:
7357
    // We don't allow fp/int overloading of intrinsics.
7358
27
    if (VTy->getElementType()->isFloatingPointTy() &&
7359
27
        
Int == Intrinsic::aarch64_neon_addp7
)
7360
5
      Int = Intrinsic::aarch64_neon_faddp;
7361
27
    break;
7362
12
  case NEON::BI__builtin_neon_vabs_v:
7363
24
  case NEON::BI__builtin_neon_vabsq_v:
7364
24
    if (VTy->getElementType()->isFloatingPointTy())
7365
10
      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7366
14
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7367
6
  case NEON::BI__builtin_neon_vadd_v:
7368
12
  case NEON::BI__builtin_neon_vaddq_v: {
7369
12
    llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 
166
:
86
);
7370
12
    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7371
12
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7372
12
    Ops[0] =  Builder.CreateXor(Ops[0], Ops[1]);
7373
12
    return Builder.CreateBitCast(Ops[0], Ty);
7374
6
  }
7375
12
  case NEON::BI__builtin_neon_vaddhn_v: {
7376
12
    llvm::FixedVectorType *SrcTy =
7377
12
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7378
7379
    // %sum = add <4 x i32> %lhs, %rhs
7380
12
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7381
12
    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7382
12
    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7383
7384
    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7385
12
    Constant *ShiftAmt =
7386
12
        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7387
12
    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7388
7389
    // %res = trunc <4 x i32> %high to <4 x i16>
7390
12
    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7391
6
  }
7392
5
  case NEON::BI__builtin_neon_vcale_v:
7393
10
  case NEON::BI__builtin_neon_vcaleq_v:
7394
15
  case NEON::BI__builtin_neon_vcalt_v:
7395
20
  case NEON::BI__builtin_neon_vcaltq_v:
7396
20
    std::swap(Ops[0], Ops[1]);
7397
20
    [[fallthrough]];
7398
25
  case NEON::BI__builtin_neon_vcage_v:
7399
30
  case NEON::BI__builtin_neon_vcageq_v:
7400
35
  case NEON::BI__builtin_neon_vcagt_v:
7401
40
  case NEON::BI__builtin_neon_vcagtq_v: {
7402
40
    llvm::Type *Ty;
7403
40
    switch (VTy->getScalarSizeInBits()) {
7404
0
    default: llvm_unreachable("unexpected type");
7405
16
    case 32:
7406
16
      Ty = FloatTy;
7407
16
      break;
7408
8
    case 64:
7409
8
      Ty = DoubleTy;
7410
8
      break;
7411
16
    case 16:
7412
16
      Ty = HalfTy;
7413
16
      break;
7414
40
    }
7415
40
    auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7416
40
    llvm::Type *Tys[] = { VTy, VecFlt };
7417
40
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7418
40
    return EmitNeonCall(F, Ops, NameHint);
7419
40
  }
7420
14
  case NEON::BI__builtin_neon_vceqz_v:
7421
28
  case NEON::BI__builtin_neon_vceqzq_v:
7422
28
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7423
28
                                         ICmpInst::ICMP_EQ, "vceqz");
7424
8
  case NEON::BI__builtin_neon_vcgez_v:
7425
16
  case NEON::BI__builtin_neon_vcgezq_v:
7426
16
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7427
16
                                         ICmpInst::ICMP_SGE, "vcgez");
7428
8
  case NEON::BI__builtin_neon_vclez_v:
7429
16
  case NEON::BI__builtin_neon_vclezq_v:
7430
16
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7431
16
                                         ICmpInst::ICMP_SLE, "vclez");
7432
8
  case NEON::BI__builtin_neon_vcgtz_v:
7433
16
  case NEON::BI__builtin_neon_vcgtzq_v:
7434
16
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7435
16
                                         ICmpInst::ICMP_SGT, "vcgtz");
7436
8
  case NEON::BI__builtin_neon_vcltz_v:
7437
16
  case NEON::BI__builtin_neon_vcltzq_v:
7438
16
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7439
16
                                         ICmpInst::ICMP_SLT, "vcltz");
7440
12
  case NEON::BI__builtin_neon_vclz_v:
7441
24
  case NEON::BI__builtin_neon_vclzq_v:
7442
    // We generate target-independent intrinsic, which needs a second argument
7443
    // for whether or not clz of zero is undefined; on ARM it isn't.
7444
24
    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7445
24
    break;
7446
4
  case NEON::BI__builtin_neon_vcvt_f32_v:
7447
8
  case NEON::BI__builtin_neon_vcvtq_f32_v:
7448
8
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7449
8
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7450
8
                     HasLegalHalfType);
7451
8
    return Usgn ? 
Builder.CreateUIToFP(Ops[0], Ty, "vcvt")4
7452
8
                : 
Builder.CreateSIToFP(Ops[0], Ty, "vcvt")4
;
7453
2
  case NEON::BI__builtin_neon_vcvt_f16_s16:
7454
4
  case NEON::BI__builtin_neon_vcvt_f16_u16:
7455
6
  case NEON::BI__builtin_neon_vcvtq_f16_s16:
7456
8
  case NEON::BI__builtin_neon_vcvtq_f16_u16:
7457
8
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7458
8
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7459
8
                     HasLegalHalfType);
7460
8
    return Usgn ? 
Builder.CreateUIToFP(Ops[0], Ty, "vcvt")4
7461
8
                : 
Builder.CreateSIToFP(Ops[0], Ty, "vcvt")4
;
7462
2
  case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7463
4
  case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7464
6
  case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7465
8
  case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7466
8
    llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7467
8
    Function *F = CGM.getIntrinsic(Int, Tys);
7468
8
    return EmitNeonCall(F, Ops, "vcvt_n");
7469
6
  }
7470
4
  case NEON::BI__builtin_neon_vcvt_n_f32_v:
7471
6
  case NEON::BI__builtin_neon_vcvt_n_f64_v:
7472
10
  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7473
12
  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7474
12
    llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7475
12
    Int = Usgn ? 
LLVMIntrinsic6
:
AltLLVMIntrinsic6
;
7476
12
    Function *F = CGM.getIntrinsic(Int, Tys);
7477
12
    return EmitNeonCall(F, Ops, "vcvt_n");
7478
10
  }
7479
2
  case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7480
4
  case NEON::BI__builtin_neon_vcvt_n_s32_v:
7481
6
  case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7482
8
  case NEON::BI__builtin_neon_vcvt_n_u32_v:
7483
9
  case NEON::BI__builtin_neon_vcvt_n_s64_v:
7484
10
  case NEON::BI__builtin_neon_vcvt_n_u64_v:
7485
12
  case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7486
14
  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7487
16
  case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7488
18
  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7489
19
  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7490
20
  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7491
20
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7492
20
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7493
20
    return EmitNeonCall(F, Ops, "vcvt_n");
7494
19
  }
7495
1
  case NEON::BI__builtin_neon_vcvt_s32_v:
7496
2
  case NEON::BI__builtin_neon_vcvt_u32_v:
7497
2
  case NEON::BI__builtin_neon_vcvt_s64_v:
7498
2
  case NEON::BI__builtin_neon_vcvt_u64_v:
7499
3
  case NEON::BI__builtin_neon_vcvt_s16_f16:
7500
4
  case NEON::BI__builtin_neon_vcvt_u16_f16:
7501
5
  case NEON::BI__builtin_neon_vcvtq_s32_v:
7502
6
  case NEON::BI__builtin_neon_vcvtq_u32_v:
7503
6
  case NEON::BI__builtin_neon_vcvtq_s64_v:
7504
6
  case NEON::BI__builtin_neon_vcvtq_u64_v:
7505
7
  case NEON::BI__builtin_neon_vcvtq_s16_f16:
7506
8
  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7507
8
    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7508
8
    return Usgn ? 
Builder.CreateFPToUI(Ops[0], Ty, "vcvt")4
7509
8
                : 
Builder.CreateFPToSI(Ops[0], Ty, "vcvt")4
;
7510
7
  }
7511
1
  case NEON::BI__builtin_neon_vcvta_s16_f16:
7512
2
  case NEON::BI__builtin_neon_vcvta_s32_v:
7513
2
  case NEON::BI__builtin_neon_vcvta_s64_v:
7514
3
  case NEON::BI__builtin_neon_vcvta_u16_f16:
7515
4
  case NEON::BI__builtin_neon_vcvta_u32_v:
7516
4
  case NEON::BI__builtin_neon_vcvta_u64_v:
7517
5
  case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7518
6
  case NEON::BI__builtin_neon_vcvtaq_s32_v:
7519
6
  case NEON::BI__builtin_neon_vcvtaq_s64_v:
7520
6
  case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7521
7
  case NEON::BI__builtin_neon_vcvtaq_u32_v:
7522
7
  case NEON::BI__builtin_neon_vcvtaq_u64_v:
7523
8
  case NEON::BI__builtin_neon_vcvtn_s16_f16:
7524
9
  case NEON::BI__builtin_neon_vcvtn_s32_v:
7525
9
  case NEON::BI__builtin_neon_vcvtn_s64_v:
7526
10
  case NEON::BI__builtin_neon_vcvtn_u16_f16:
7527
11
  case NEON::BI__builtin_neon_vcvtn_u32_v:
7528
11
  case NEON::BI__builtin_neon_vcvtn_u64_v:
7529
12
  case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7530
13
  case NEON::BI__builtin_neon_vcvtnq_s32_v:
7531
13
  case NEON::BI__builtin_neon_vcvtnq_s64_v:
7532
14
  case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7533
15
  case NEON::BI__builtin_neon_vcvtnq_u32_v:
7534
15
  case NEON::BI__builtin_neon_vcvtnq_u64_v:
7535
16
  case NEON::BI__builtin_neon_vcvtp_s16_f16:
7536
17
  case NEON::BI__builtin_neon_vcvtp_s32_v:
7537
17
  case NEON::BI__builtin_neon_vcvtp_s64_v:
7538
18
  case NEON::BI__builtin_neon_vcvtp_u16_f16:
7539
19
  case NEON::BI__builtin_neon_vcvtp_u32_v:
7540
19
  case NEON::BI__builtin_neon_vcvtp_u64_v:
7541
20
  case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7542
21
  case NEON::BI__builtin_neon_vcvtpq_s32_v:
7543
21
  case NEON::BI__builtin_neon_vcvtpq_s64_v:
7544
22
  case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7545
23
  case NEON::BI__builtin_neon_vcvtpq_u32_v:
7546
23
  case NEON::BI__builtin_neon_vcvtpq_u64_v:
7547
24
  case NEON::BI__builtin_neon_vcvtm_s16_f16:
7548
25
  case NEON::BI__builtin_neon_vcvtm_s32_v:
7549
25
  case NEON::BI__builtin_neon_vcvtm_s64_v:
7550
26
  case NEON::BI__builtin_neon_vcvtm_u16_f16:
7551
27
  case NEON::BI__builtin_neon_vcvtm_u32_v:
7552
27
  case NEON::BI__builtin_neon_vcvtm_u64_v:
7553
28
  case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7554
29
  case NEON::BI__builtin_neon_vcvtmq_s32_v:
7555
29
  case NEON::BI__builtin_neon_vcvtmq_s64_v:
7556
30
  case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7557
31
  case NEON::BI__builtin_neon_vcvtmq_u32_v:
7558
31
  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7559
31
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7560
31
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7561
31
  }
7562
1
  case NEON::BI__builtin_neon_vcvtx_f32_v: {
7563
1
    llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7564
1
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7565
7566
31
  }
7567
26
  case NEON::BI__builtin_neon_vext_v:
7568
52
  case NEON::BI__builtin_neon_vextq_v: {
7569
52
    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7570
52
    SmallVector<int, 16> Indices;
7571
346
    for (unsigned i = 0, e = VTy->getNumElements(); i != e; 
++i294
)
7572
294
      Indices.push_back(i+CV);
7573
7574
52
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7575
52
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7576
52
    return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7577
26
  }
7578
16
  case NEON::BI__builtin_neon_vfma_v:
7579
32
  case NEON::BI__builtin_neon_vfmaq_v: {
7580
32
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7581
32
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7582
32
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7583
7584
    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7585
32
    return emitCallMaybeConstrainedFPBuiltin(
7586
32
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7587
32
        {Ops[1], Ops[2], Ops[0]});
7588
16
  }
7589
15
  case NEON::BI__builtin_neon_vld1_v:
7590
33
  case NEON::BI__builtin_neon_vld1q_v: {
7591
33
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
7592
33
    Ops.push_back(getAlignmentValue32(PtrOp0));
7593
33
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7594
15
  }
7595
28
  case NEON::BI__builtin_neon_vld1_x2_v:
7596
56
  case NEON::BI__builtin_neon_vld1q_x2_v:
7597
84
  case NEON::BI__builtin_neon_vld1_x3_v:
7598
112
  case NEON::BI__builtin_neon_vld1q_x3_v:
7599
140
  case NEON::BI__builtin_neon_vld1_x4_v:
7600
168
  case NEON::BI__builtin_neon_vld1q_x4_v: {
7601
168
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7602
168
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7603
168
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7604
168
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7605
140
  }
7606
13
  case NEON::BI__builtin_neon_vld2_v:
7607
24
  case NEON::BI__builtin_neon_vld2q_v:
7608
37
  case NEON::BI__builtin_neon_vld3_v:
7609
48
  case NEON::BI__builtin_neon_vld3q_v:
7610
61
  case NEON::BI__builtin_neon_vld4_v:
7611
72
  case NEON::BI__builtin_neon_vld4q_v:
7612
85
  case NEON::BI__builtin_neon_vld2_dup_v:
7613
96
  case NEON::BI__builtin_neon_vld2q_dup_v:
7614
109
  case NEON::BI__builtin_neon_vld3_dup_v:
7615
120
  case NEON::BI__builtin_neon_vld3q_dup_v:
7616
133
  case NEON::BI__builtin_neon_vld4_dup_v:
7617
144
  case NEON::BI__builtin_neon_vld4q_dup_v: {
7618
144
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
7619
144
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7620
144
    Value *Align = getAlignmentValue32(PtrOp1);
7621
144
    Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7622
144
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7623
133
  }
7624
14
  case NEON::BI__builtin_neon_vld1_dup_v:
7625
27
  case NEON::BI__builtin_neon_vld1q_dup_v: {
7626
27
    Value *V = PoisonValue::get(Ty);
7627
27
    PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7628
27
    LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7629
27
    llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7630
27
    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7631
27
    return EmitNeonSplat(Ops[0], CI);
7632
14
  }
7633
11
  case NEON::BI__builtin_neon_vld2_lane_v:
7634
19
  case NEON::BI__builtin_neon_vld2q_lane_v:
7635
30
  case NEON::BI__builtin_neon_vld3_lane_v:
7636
38
  case NEON::BI__builtin_neon_vld3q_lane_v:
7637
49
  case NEON::BI__builtin_neon_vld4_lane_v:
7638
57
  case NEON::BI__builtin_neon_vld4q_lane_v: {
7639
57
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
7640
57
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7641
228
    for (unsigned I = 2; I < Ops.size() - 1; 
++I171
)
7642
171
      Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7643
57
    Ops.push_back(getAlignmentValue32(PtrOp1));
7644
57
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7645
57
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7646
49
  }
7647
12
  case NEON::BI__builtin_neon_vmovl_v: {
7648
12
    llvm::FixedVectorType *DTy =
7649
12
        llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7650
12
    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7651
12
    if (Usgn)
7652
6
      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
7653
6
    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
7654
12
  }
7655
12
  case NEON::BI__builtin_neon_vmovn_v: {
7656
12
    llvm::FixedVectorType *QTy =
7657
12
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7658
12
    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
7659
12
    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
7660
12
  }
7661
7
  case NEON::BI__builtin_neon_vmull_v:
7662
    // FIXME: the integer vmull operations could be emitted in terms of pure
7663
    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7664
    // hoisting the exts outside loops. Until global ISel comes along that can
7665
    // see through such movement this leads to bad CodeGen. So we need an
7666
    // intrinsic for now.
7667
7
    Int = Usgn ? 
Intrinsic::arm_neon_vmullu3
:
Intrinsic::arm_neon_vmulls4
;
7668
7
    Int = Type.isPoly() ? 
(unsigned)Intrinsic::arm_neon_vmullp1
:
Int6
;
7669
7
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7670
6
  case NEON::BI__builtin_neon_vpadal_v:
7671
12
  case NEON::BI__builtin_neon_vpadalq_v: {
7672
    // The source operand type has twice as many elements of half the size.
7673
12
    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7674
12
    llvm::Type *EltTy =
7675
12
      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7676
12
    auto *NarrowTy =
7677
12
        llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7678
12
    llvm::Type *Tys[2] = { Ty, NarrowTy };
7679
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7680
6
  }
7681
12
  case NEON::BI__builtin_neon_vpaddl_v:
7682
24
  case NEON::BI__builtin_neon_vpaddlq_v: {
7683
    // The source operand type has twice as many elements of half the size.
7684
24
    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7685
24
    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7686
24
    auto *NarrowTy =
7687
24
        llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7688
24
    llvm::Type *Tys[2] = { Ty, NarrowTy };
7689
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
7690
12
  }
7691
6
  case NEON::BI__builtin_neon_vqdmlal_v:
7692
12
  case NEON::BI__builtin_neon_vqdmlsl_v: {
7693
12
    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
7694
12
    Ops[1] =
7695
12
        EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
7696
12
    Ops.resize(2);
7697
12
    return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
7698
6
  }
7699
4
  case NEON::BI__builtin_neon_vqdmulhq_lane_v:
7700
8
  case NEON::BI__builtin_neon_vqdmulh_lane_v:
7701
12
  case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
7702
16
  case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
7703
16
    auto *RTy = cast<llvm::FixedVectorType>(Ty);
7704
16
    if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
7705
16
        
BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v12
)
7706
8
      RTy = llvm::FixedVectorType::get(RTy->getElementType(),
7707
8
                                       RTy->getNumElements() * 2);
7708
16
    llvm::Type *Tys[2] = {
7709
16
        RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7710
16
                                             /*isQuad*/ false))};
7711
16
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7712
12
  }
7713
4
  case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
7714
8
  case NEON::BI__builtin_neon_vqdmulh_laneq_v:
7715
12
  case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
7716
16
  case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
7717
16
    llvm::Type *Tys[2] = {
7718
16
        Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7719
16
                                            /*isQuad*/ true))};
7720
16
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7721
12
  }
7722
16
  case NEON::BI__builtin_neon_vqshl_n_v:
7723
32
  case NEON::BI__builtin_neon_vqshlq_n_v:
7724
32
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
7725
32
                        1, false);
7726
8
  case NEON::BI__builtin_neon_vqshlu_n_v:
7727
16
  case NEON::BI__builtin_neon_vqshluq_n_v:
7728
16
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
7729
16
                        1, false);
7730
7
  case NEON::BI__builtin_neon_vrecpe_v:
7731
14
  case NEON::BI__builtin_neon_vrecpeq_v:
7732
21
  case NEON::BI__builtin_neon_vrsqrte_v:
7733
28
  case NEON::BI__builtin_neon_vrsqrteq_v:
7734
28
    Int = Ty->isFPOrFPVectorTy() ? 
LLVMIntrinsic20
:
AltLLVMIntrinsic8
;
7735
28
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7736
14
  case NEON::BI__builtin_neon_vrndi_v:
7737
26
  case NEON::BI__builtin_neon_vrndiq_v:
7738
26
    Int = Builder.getIsFPConstrained()
7739
26
              ? 
Intrinsic::experimental_constrained_nearbyint9
7740
26
              : 
Intrinsic::nearbyint17
;
7741
26
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7742
16
  case NEON::BI__builtin_neon_vrshr_n_v:
7743
32
  case NEON::BI__builtin_neon_vrshrq_n_v:
7744
32
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
7745
32
                        1, true);
7746
1
  case NEON::BI__builtin_neon_vsha512hq_u64:
7747
2
  case NEON::BI__builtin_neon_vsha512h2q_u64:
7748
3
  case NEON::BI__builtin_neon_vsha512su0q_u64:
7749
4
  case NEON::BI__builtin_neon_vsha512su1q_u64: {
7750
4
    Function *F = CGM.getIntrinsic(Int);
7751
4
    return EmitNeonCall(F, Ops, "");
7752
3
  }
7753
16
  case NEON::BI__builtin_neon_vshl_n_v:
7754
32
  case NEON::BI__builtin_neon_vshlq_n_v:
7755
32
    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
7756
32
    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
7757
32
                             "vshl_n");
7758
39
  case NEON::BI__builtin_neon_vshll_n_v: {
7759
39
    llvm::FixedVectorType *SrcTy =
7760
39
        llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7761
39
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7762
39
    if (Usgn)
7763
18
      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
7764
21
    else
7765
21
      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
7766
39
    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
7767
39
    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
7768
16
  }
7769
18
  case NEON::BI__builtin_neon_vshrn_n_v: {
7770
18
    llvm::FixedVectorType *SrcTy =
7771
18
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7772
18
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7773
18
    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
7774
18
    if (Usgn)
7775
9
      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
7776
9
    else
7777
9
      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
7778
18
    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
7779
16
  }
7780
22
  case NEON::BI__builtin_neon_vshr_n_v:
7781
38
  case NEON::BI__builtin_neon_vshrq_n_v:
7782
38
    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
7783
13
  case NEON::BI__builtin_neon_vst1_v:
7784
27
  case NEON::BI__builtin_neon_vst1q_v:
7785
40
  case NEON::BI__builtin_neon_vst2_v:
7786
51
  case NEON::BI__builtin_neon_vst2q_v:
7787
64
  case NEON::BI__builtin_neon_vst3_v:
7788
75
  case NEON::BI__builtin_neon_vst3q_v:
7789
88
  case NEON::BI__builtin_neon_vst4_v:
7790
99
  case NEON::BI__builtin_neon_vst4q_v:
7791
110
  case NEON::BI__builtin_neon_vst2_lane_v:
7792
118
  case NEON::BI__builtin_neon_vst2q_lane_v:
7793
129
  case NEON::BI__builtin_neon_vst3_lane_v:
7794
137
  case NEON::BI__builtin_neon_vst3q_lane_v:
7795
148
  case NEON::BI__builtin_neon_vst4_lane_v:
7796
156
  case NEON::BI__builtin_neon_vst4q_lane_v: {
7797
156
    llvm::Type *Tys[] = {Int8PtrTy, Ty};
7798
156
    Ops.push_back(getAlignmentValue32(PtrOp0));
7799
156
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
7800
148
  }
7801
1
  case NEON::BI__builtin_neon_vsm3partw1q_u32:
7802
2
  case NEON::BI__builtin_neon_vsm3partw2q_u32:
7803
3
  case NEON::BI__builtin_neon_vsm3ss1q_u32:
7804
4
  case NEON::BI__builtin_neon_vsm4ekeyq_u32:
7805
5
  case NEON::BI__builtin_neon_vsm4eq_u32: {
7806
5
    Function *F = CGM.getIntrinsic(Int);
7807
5
    return EmitNeonCall(F, Ops, "");
7808
4
  }
7809
1
  case NEON::BI__builtin_neon_vsm3tt1aq_u32:
7810
2
  case NEON::BI__builtin_neon_vsm3tt1bq_u32:
7811
3
  case NEON::BI__builtin_neon_vsm3tt2aq_u32:
7812
4
  case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
7813
4
    Function *F = CGM.getIntrinsic(Int);
7814
4
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7815
4
    return EmitNeonCall(F, Ops, "");
7816
3
  }
7817
28
  case NEON::BI__builtin_neon_vst1_x2_v:
7818
56
  case NEON::BI__builtin_neon_vst1q_x2_v:
7819
84
  case NEON::BI__builtin_neon_vst1_x3_v:
7820
112
  case NEON::BI__builtin_neon_vst1q_x3_v:
7821
140
  case NEON::BI__builtin_neon_vst1_x4_v:
7822
168
  case NEON::BI__builtin_neon_vst1q_x4_v: {
7823
    // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
7824
    // in AArch64 it comes last. We may want to stick to one or another.
7825
168
    if (Arch == llvm::Triple::aarch64 || 
Arch == llvm::Triple::aarch64_be78
||
7826
168
        
Arch == llvm::Triple::aarch64_3278
) {
7827
90
      llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7828
90
      std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7829
90
      return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7830
90
    }
7831
78
    llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
7832
78
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7833
168
  }
7834
12
  case NEON::BI__builtin_neon_vsubhn_v: {
7835
12
    llvm::FixedVectorType *SrcTy =
7836
12
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7837
7838
    // %sum = add <4 x i32> %lhs, %rhs
7839
12
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7840
12
    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7841
12
    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
7842
7843
    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7844
12
    Constant *ShiftAmt =
7845
12
        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7846
12
    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
7847
7848
    // %res = trunc <4 x i32> %high to <4 x i16>
7849
12
    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
7850
168
  }
7851
10
  case NEON::BI__builtin_neon_vtrn_v:
7852
20
  case NEON::BI__builtin_neon_vtrnq_v: {
7853
20
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7854
20
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7855
20
    Value *SV = nullptr;
7856
7857
60
    for (unsigned vi = 0; vi != 2; 
++vi40
) {
7858
40
      SmallVector<int, 16> Indices;
7859
178
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; 
i += 2138
) {
7860
138
        Indices.push_back(i+vi);
7861
138
        Indices.push_back(i+e+vi);
7862
138
      }
7863
40
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7864
40
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7865
40
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7866
40
    }
7867
20
    return SV;
7868
10
  }
7869
19
  case NEON::BI__builtin_neon_vtst_v:
7870
38
  case NEON::BI__builtin_neon_vtstq_v: {
7871
38
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7872
38
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7873
38
    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7874
38
    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7875
38
                                ConstantAggregateZero::get(Ty));
7876
38
    return Builder.CreateSExt(Ops[0], Ty, "vtst");
7877
19
  }
7878
10
  case NEON::BI__builtin_neon_vuzp_v:
7879
20
  case NEON::BI__builtin_neon_vuzpq_v: {
7880
20
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7881
20
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7882
20
    Value *SV = nullptr;
7883
7884
60
    for (unsigned vi = 0; vi != 2; 
++vi40
) {
7885
40
      SmallVector<int, 16> Indices;
7886
316
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; 
++i276
)
7887
276
        Indices.push_back(2*i+vi);
7888
7889
40
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7890
40
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7891
40
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7892
40
    }
7893
20
    return SV;
7894
10
  }
7895
1
  case NEON::BI__builtin_neon_vxarq_u64: {
7896
1
    Function *F = CGM.getIntrinsic(Int);
7897
1
    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7898
1
    return EmitNeonCall(F, Ops, "");
7899
10
  }
7900
10
  case NEON::BI__builtin_neon_vzip_v:
7901
21
  case NEON::BI__builtin_neon_vzipq_v: {
7902
21
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7903
21
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7904
21
    Value *SV = nullptr;
7905
7906
63
    for (unsigned vi = 0; vi != 2; 
++vi42
) {
7907
42
      SmallVector<int, 16> Indices;
7908
196
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; 
i += 2154
) {
7909
154
        Indices.push_back((i + vi*e) >> 1);
7910
154
        Indices.push_back(((i + vi*e) >> 1)+e);
7911
154
      }
7912
42
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7913
42
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7914
42
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7915
42
    }
7916
21
    return SV;
7917
10
  }
7918
2
  case NEON::BI__builtin_neon_vdot_s32:
7919
4
  case NEON::BI__builtin_neon_vdot_u32:
7920
6
  case NEON::BI__builtin_neon_vdotq_s32:
7921
8
  case NEON::BI__builtin_neon_vdotq_u32: {
7922
8
    auto *InputTy =
7923
8
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7924
8
    llvm::Type *Tys[2] = { Ty, InputTy };
7925
8
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
7926
6
  }
7927
1
  case NEON::BI__builtin_neon_vfmlal_low_f16:
7928
2
  case NEON::BI__builtin_neon_vfmlalq_low_f16: {
7929
2
    auto *InputTy =
7930
2
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7931
2
    llvm::Type *Tys[2] = { Ty, InputTy };
7932
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
7933
1
  }
7934
1
  case NEON::BI__builtin_neon_vfmlsl_low_f16:
7935
2
  case NEON::BI__builtin_neon_vfmlslq_low_f16: {
7936
2
    auto *InputTy =
7937
2
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7938
2
    llvm::Type *Tys[2] = { Ty, InputTy };
7939
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
7940
1
  }
7941
1
  case NEON::BI__builtin_neon_vfmlal_high_f16:
7942
2
  case NEON::BI__builtin_neon_vfmlalq_high_f16: {
7943
2
    auto *InputTy =
7944
2
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7945
2
    llvm::Type *Tys[2] = { Ty, InputTy };
7946
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
7947
1
  }
7948
1
  case NEON::BI__builtin_neon_vfmlsl_high_f16:
7949
2
  case NEON::BI__builtin_neon_vfmlslq_high_f16: {
7950
2
    auto *InputTy =
7951
2
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7952
2
    llvm::Type *Tys[2] = { Ty, InputTy };
7953
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
7954
1
  }
7955
2
  case NEON::BI__builtin_neon_vmmlaq_s32:
7956
4
  case NEON::BI__builtin_neon_vmmlaq_u32: {
7957
4
    auto *InputTy =
7958
4
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7959
4
    llvm::Type *Tys[2] = { Ty, InputTy };
7960
4
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
7961
2
  }
7962
2
  case NEON::BI__builtin_neon_vusmmlaq_s32: {
7963
2
    auto *InputTy =
7964
2
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7965
2
    llvm::Type *Tys[2] = { Ty, InputTy };
7966
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
7967
2
  }
7968
2
  case NEON::BI__builtin_neon_vusdot_s32:
7969
4
  case NEON::BI__builtin_neon_vusdotq_s32: {
7970
4
    auto *InputTy =
7971
4
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7972
4
    llvm::Type *Tys[2] = { Ty, InputTy };
7973
4
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
7974
2
  }
7975
3
  case NEON::BI__builtin_neon_vbfdot_f32:
7976
6
  case NEON::BI__builtin_neon_vbfdotq_f32: {
7977
6
    llvm::Type *InputTy =
7978
6
        llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
7979
6
    llvm::Type *Tys[2] = { Ty, InputTy };
7980
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
7981
3
  }
7982
2
  case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
7983
2
    llvm::Type *Tys[1] = { Ty };
7984
2
    Function *F = CGM.getIntrinsic(Int, Tys);
7985
2
    return EmitNeonCall(F, Ops, "vcvtfp2bf");
7986
3
  }
7987
7988
2.84k
  }
7989
7990
735
  assert(Int && "Expected valid intrinsic number");
7991
7992
  // Determine the type(s) of this overloaded AArch64 intrinsic.
7993
735
  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
7994
7995
735
  Value *Result = EmitNeonCall(F, Ops, NameHint);
7996
735
  llvm::Type *ResultType = ConvertType(E->getType());
7997
  // AArch64 intrinsic one-element vector type cast to
7998
  // scalar type expected by the builtin
7999
735
  return Builder.CreateBitCast(Result, ResultType, NameHint);
8000
735
}
8001
8002
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
8003
    Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8004
142
    const CmpInst::Predicate Ip, const Twine &Name) {
8005
142
  llvm::Type *OTy = Op->getType();
8006
8007
  // FIXME: this is utterly horrific. We should not be looking at previous
8008
  // codegen context to find out what needs doing. Unfortunately TableGen
8009
  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8010
  // (etc).
8011
142
  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8012
78
    OTy = BI->getOperand(0)->getType();
8013
8014
142
  Op = Builder.CreateBitCast(Op, OTy);
8015
142
  if (OTy->getScalarType()->isFloatingPointTy()) {
8016
85
    if (Fp == CmpInst::FCMP_OEQ)
8017
17
      Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8018
68
    else
8019
68
      Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8020
85
  } else {
8021
57
    Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8022
57
  }
8023
142
  return Builder.CreateSExt(Op, Ty, Name);
8024
142
}
8025
8026
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
8027
                                 Value *ExtOp, Value *IndexOp,
8028
                                 llvm::Type *ResTy, unsigned IntID,
8029
24
                                 const char *Name) {
8030
24
  SmallVector<Value *, 2> TblOps;
8031
24
  if (ExtOp)
8032
6
    TblOps.push_back(ExtOp);
8033
8034
  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8035
24
  SmallVector<int, 16> Indices;
8036
24
  auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8037
216
  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; 
++i192
) {
8038
192
    Indices.push_back(2*i);
8039
192
    Indices.push_back(2*i+1);
8040
192
  }
8041
8042
24
  int PairPos = 0, End = Ops.size() - 1;
8043
48
  while (PairPos < End) {
8044
24
    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8045
24
                                                     Ops[PairPos+1], Indices,
8046
24
                                                     Name));
8047
24
    PairPos += 2;
8048
24
  }
8049
8050
  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8051
  // of the 128-bit lookup table with zero.
8052
24
  if (PairPos == End) {
8053
12
    Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8054
12
    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8055
12
                                                     ZeroTbl, Indices, Name));
8056
12
  }
8057
8058
24
  Function *TblF;
8059
24
  TblOps.push_back(IndexOp);
8060
24
  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8061
8062
24
  return CGF.EmitNeonCall(TblF, TblOps, Name);
8063
24
}
8064
8065
6.67k
Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8066
6.67k
  unsigned Value;
8067
6.67k
  switch (BuiltinID) {
8068
6.66k
  default:
8069
6.66k
    return nullptr;
8070
2
  case clang::ARM::BI__builtin_arm_nop:
8071
2
    Value = 0;
8072
2
    break;
8073
2
  case clang::ARM::BI__builtin_arm_yield:
8074
3
  case clang::ARM::BI__yield:
8075
3
    Value = 1;
8076
3
    break;
8077
2
  case clang::ARM::BI__builtin_arm_wfe:
8078
3
  case clang::ARM::BI__wfe:
8079
3
    Value = 2;
8080
3
    break;
8081
2
  case clang::ARM::BI__builtin_arm_wfi:
8082
3
  case clang::ARM::BI__wfi:
8083
3
    Value = 3;
8084
3
    break;
8085
2
  case clang::ARM::BI__builtin_arm_sev:
8086
3
  case clang::ARM::BI__sev:
8087
3
    Value = 4;
8088
3
    break;
8089
2
  case clang::ARM::BI__builtin_arm_sevl:
8090
3
  case clang::ARM::BI__sevl:
8091
3
    Value = 5;
8092
3
    break;
8093
6.67k
  }
8094
8095
17
  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8096
17
                            llvm::ConstantInt::get(Int32Ty, Value));
8097
6.67k
}
8098
8099
enum SpecialRegisterAccessKind {
8100
  NormalRead,
8101
  VolatileRead,
8102
  Write,
8103
};
8104
8105
// Generates the IR for __builtin_read_exec_*.
8106
// Lowers the builtin to amdgcn_ballot intrinsic.
8107
static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
8108
                                      llvm::Type *RegisterType,
8109
30
                                      llvm::Type *ValueType, bool isExecHi) {
8110
30
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
8111
30
  CodeGen::CodeGenModule &CGM = CGF.CGM;
8112
8113
30
  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8114
30
  llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8115
8116
30
  if (isExecHi) {
8117
10
    Value *Rt2 = Builder.CreateLShr(Call, 32);
8118
10
    Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8119
10
    return Rt2;
8120
10
  }
8121
8122
20
  return Call;
8123
30
}
8124
8125
// Generates the IR for the read/write special register builtin,
8126
// ValueType is the type of the value that is to be written or read,
8127
// RegisterType is the type of the register being written to or read from.
8128
static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
8129
                                         const CallExpr *E,
8130
                                         llvm::Type *RegisterType,
8131
                                         llvm::Type *ValueType,
8132
                                         SpecialRegisterAccessKind AccessKind,
8133
96
                                         StringRef SysReg = "") {
8134
  // write and register intrinsics only support 32, 64 and 128 bit operations.
8135
96
  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8136
96
          RegisterType->isIntegerTy(128)) &&
8137
96
         "Unsupported size for register.");
8138
8139
96
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
8140
96
  CodeGen::CodeGenModule &CGM = CGF.CGM;
8141
96
  LLVMContext &Context = CGM.getLLVMContext();
8142
8143
96
  if (SysReg.empty()) {
8144
96
    const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8145
96
    SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8146
96
  }
8147
8148
96
  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8149
96
  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8150
96
  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8151
8152
96
  llvm::Type *Types[] = { RegisterType };
8153
8154
96
  bool MixedTypes = RegisterType->isIntegerTy(64) && 
ValueType->isIntegerTy(32)76
;
8155
96
  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8156
96
            && "Can't fit 64-bit value in 32-bit register");
8157
8158
96
  if (AccessKind != Write) {
8159
48
    assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8160
48
    llvm::Function *F = CGM.getIntrinsic(
8161
48
        AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8162
48
                                   : 
llvm::Intrinsic::read_register0
,
8163
48
        Types);
8164
48
    llvm::Value *Call = Builder.CreateCall(F, Metadata);
8165
8166
48
    if (MixedTypes)
8167
      // Read into 64 bit register and then truncate result to 32 bit.
8168
13
      return Builder.CreateTrunc(Call, ValueType);
8169
8170
35
    if (ValueType->isPointerTy())
8171
      // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8172
11
      return Builder.CreateIntToPtr(Call, ValueType);
8173
8174
24
    return Call;
8175
35
  }
8176
8177
48
  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8178
48
  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8179
48
  if (MixedTypes) {
8180
    // Extend 32 bit write value to 64 bit to pass to write.
8181
13
    ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8182
13
    return Builder.CreateCall(F, { Metadata, ArgValue });
8183
13
  }
8184
8185
35
  if (ValueType->isPointerTy()) {
8186
    // Have VoidPtrTy ArgValue but want to return an i32/i64.
8187
11
    ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8188
11
    return Builder.CreateCall(F, { Metadata, ArgValue });
8189
11
  }
8190
8191
24
  return Builder.CreateCall(F, { Metadata, ArgValue });
8192
35
}
8193
8194
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8195
/// argument that specifies the vector type.
8196
1.77k
static bool HasExtraNeonArgument(unsigned BuiltinID) {
8197
1.77k
  switch (BuiltinID) {
8198
1.64k
  default: break;
8199
1.64k
  case NEON::BI__builtin_neon_vget_lane_i8:
8200
7
  case NEON::BI__builtin_neon_vget_lane_i16:
8201
41
  case NEON::BI__builtin_neon_vget_lane_bf16:
8202
43
  case NEON::BI__builtin_neon_vget_lane_i32:
8203
45
  case NEON::BI__builtin_neon_vget_lane_i64:
8204
46
  case NEON::BI__builtin_neon_vget_lane_f32:
8205
49
  case NEON::BI__builtin_neon_vgetq_lane_i8:
8206
53
  case NEON::BI__builtin_neon_vgetq_lane_i16:
8207
87
  case NEON::BI__builtin_neon_vgetq_lane_bf16:
8208
89
  case NEON::BI__builtin_neon_vgetq_lane_i32:
8209
91
  case NEON::BI__builtin_neon_vgetq_lane_i64:
8210
92
  case NEON::BI__builtin_neon_vgetq_lane_f32:
8211
94
  case NEON::BI__builtin_neon_vduph_lane_bf16:
8212
96
  case NEON::BI__builtin_neon_vduph_laneq_bf16:
8213
99
  case NEON::BI__builtin_neon_vset_lane_i8:
8214
103
  case NEON::BI__builtin_neon_vset_lane_i16:
8215
105
  case NEON::BI__builtin_neon_vset_lane_bf16:
8216
107
  case NEON::BI__builtin_neon_vset_lane_i32:
8217
109
  case NEON::BI__builtin_neon_vset_lane_i64:
8218
110
  case NEON::BI__builtin_neon_vset_lane_f32:
8219
113
  case NEON::BI__builtin_neon_vsetq_lane_i8:
8220
117
  case NEON::BI__builtin_neon_vsetq_lane_i16:
8221
119
  case NEON::BI__builtin_neon_vsetq_lane_bf16:
8222
121
  case NEON::BI__builtin_neon_vsetq_lane_i32:
8223
123
  case NEON::BI__builtin_neon_vsetq_lane_i64:
8224
124
  case NEON::BI__builtin_neon_vsetq_lane_f32:
8225
125
  case NEON::BI__builtin_neon_vsha1h_u32:
8226
126
  case NEON::BI__builtin_neon_vsha1cq_u32:
8227
127
  case NEON::BI__builtin_neon_vsha1pq_u32:
8228
128
  case NEON::BI__builtin_neon_vsha1mq_u32:
8229
130
  case NEON::BI__builtin_neon_vcvth_bf16_f32:
8230
131
  case clang::ARM::BI_MoveToCoprocessor:
8231
132
  case clang::ARM::BI_MoveToCoprocessor2:
8232
132
    return false;
8233
1.77k
  }
8234
1.64k
  return true;
8235
1.77k
}
8236
8237
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8238
                                           const CallExpr *E,
8239
                                           ReturnValueSlot ReturnValue,
8240
6.67k
                                           llvm::Triple::ArchType Arch) {
8241
6.67k
  if (auto Hint = GetValueForARMHint(BuiltinID))
8242
17
    return Hint;
8243
8244
6.66k
  if (BuiltinID == clang::ARM::BI__emit) {
8245
2
    bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8246
2
    llvm::FunctionType *FTy =
8247
2
        llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8248
8249
2
    Expr::EvalResult Result;
8250
2
    if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8251
0
      llvm_unreachable("Sema will ensure that the parameter is constant");
8252
8253
2
    llvm::APSInt Value = Result.Val.getInt();
8254
2
    uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 
320
).getZExtValue();
8255
8256
2
    llvm::InlineAsm *Emit =
8257
2
        IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8258
2
                                 /*hasSideEffects=*/true)
8259
2
                : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8260
0
                                 /*hasSideEffects=*/true);
8261
8262
2
    return Builder.CreateCall(Emit);
8263
2
  }
8264
8265
6.66k
  if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8266
2
    Value *Option = EmitScalarExpr(E->getArg(0));
8267
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8268
2
  }
8269
8270
6.65k
  if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8271
7
    Value *Address = EmitScalarExpr(E->getArg(0));
8272
7
    Value *RW      = EmitScalarExpr(E->getArg(1));
8273
7
    Value *IsData  = EmitScalarExpr(E->getArg(2));
8274
8275
    // Locality is not supported on ARM target
8276
7
    Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8277
8278
7
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8279
7
    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8280
7
  }
8281
8282
6.65k
  if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8283
4
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8284
4
    return Builder.CreateCall(
8285
4
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8286
4
  }
8287
8288
6.64k
  if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8289
6.64k
      
BuiltinID == clang::ARM::BI__builtin_arm_clz646.64k
) {
8290
3
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8291
3
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8292
3
    Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8293
3
    if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8294
1
      Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8295
3
    return Res;
8296
3
  }
8297
8298
8299
6.64k
  if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8300
4
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8301
4
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8302
4
  }
8303
6.64k
  if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8304
2
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8305
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8306
2
                              "cls");
8307
2
  }
8308
8309
6.63k
  if (BuiltinID == clang::ARM::BI__clear_cache) {
8310
2
    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8311
2
    const FunctionDecl *FD = E->getDirectCallee();
8312
2
    Value *Ops[2];
8313
6
    for (unsigned i = 0; i < 2; 
i++4
)
8314
4
      Ops[i] = EmitScalarExpr(E->getArg(i));
8315
2
    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8316
2
    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8317
2
    StringRef Name = FD->getName();
8318
2
    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8319
2
  }
8320
8321
6.63k
  if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8322
6.63k
      
BuiltinID == clang::ARM::BI__builtin_arm_mcrr26.63k
) {
8323
2
    Function *F;
8324
8325
2
    switch (BuiltinID) {
8326
0
    default: llvm_unreachable("unexpected builtin");
8327
1
    case clang::ARM::BI__builtin_arm_mcrr:
8328
1
      F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8329
1
      break;
8330
1
    case clang::ARM::BI__builtin_arm_mcrr2:
8331
1
      F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8332
1
      break;
8333
2
    }
8334
8335
    // MCRR{2} instruction has 5 operands but
8336
    // the intrinsic has 4 because Rt and Rt2
8337
    // are represented as a single unsigned 64
8338
    // bit integer in the intrinsic definition
8339
    // but internally it's represented as 2 32
8340
    // bit integers.
8341
8342
2
    Value *Coproc = EmitScalarExpr(E->getArg(0));
8343
2
    Value *Opc1 = EmitScalarExpr(E->getArg(1));
8344
2
    Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8345
2
    Value *CRm = EmitScalarExpr(E->getArg(3));
8346
8347
2
    Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8348
2
    Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8349
2
    Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8350
2
    Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8351
8352
2
    return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8353
2
  }
8354
8355
6.63k
  if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8356
6.63k
      
BuiltinID == clang::ARM::BI__builtin_arm_mrrc26.63k
) {
8357
2
    Function *F;
8358
8359
2
    switch (BuiltinID) {
8360
0
    default: llvm_unreachable("unexpected builtin");
8361
1
    case clang::ARM::BI__builtin_arm_mrrc:
8362
1
      F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8363
1
      break;
8364
1
    case clang::ARM::BI__builtin_arm_mrrc2:
8365
1
      F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8366
1
      break;
8367
2
    }
8368
8369
2
    Value *Coproc = EmitScalarExpr(E->getArg(0));
8370
2
    Value *Opc1 = EmitScalarExpr(E->getArg(1));
8371
2
    Value *CRm  = EmitScalarExpr(E->getArg(2));
8372
2
    Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8373
8374
    // Returns an unsigned 64 bit integer, represented
8375
    // as two 32 bit integers.
8376
8377
2
    Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8378
2
    Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8379
2
    Rt = Builder.CreateZExt(Rt, Int64Ty);
8380
2
    Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8381
8382
2
    Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8383
2
    RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8384
2
    RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8385
8386
2
    return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8387
2
  }
8388
8389
6.63k
  if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8390
6.63k
      ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8391
6.63k
        
BuiltinID == clang::ARM::BI__builtin_arm_ldaex6.62k
) &&
8392
6.63k
       
getContext().getTypeSize(E->getType()) == 6420
) ||
8393
6.63k
      
BuiltinID == clang::ARM::BI__ldrexd6.62k
) {
8394
7
    Function *F;
8395
8396
7
    switch (BuiltinID) {
8397
0
    default: llvm_unreachable("unexpected builtin");
8398
3
    case clang::ARM::BI__builtin_arm_ldaex:
8399
3
      F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8400
3
      break;
8401
0
    case clang::ARM::BI__builtin_arm_ldrexd:
8402
3
    case clang::ARM::BI__builtin_arm_ldrex:
8403
4
    case clang::ARM::BI__ldrexd:
8404
4
      F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8405
4
      break;
8406
7
    }
8407
8408
7
    Value *LdPtr = EmitScalarExpr(E->getArg(0));
8409
7
    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
8410
7
                                    "ldrexd");
8411
8412
7
    Value *Val0 = Builder.CreateExtractValue(Val, 1);
8413
7
    Value *Val1 = Builder.CreateExtractValue(Val, 0);
8414
7
    Val0 = Builder.CreateZExt(Val0, Int64Ty);
8415
7
    Val1 = Builder.CreateZExt(Val1, Int64Ty);
8416
8417
7
    Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8418
7
    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8419
7
    Val = Builder.CreateOr(Val, Val1);
8420
7
    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8421
7
  }
8422
8423
6.62k
  if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8424
6.62k
      
BuiltinID == clang::ARM::BI__builtin_arm_ldaex6.61k
) {
8425
14
    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8426
8427
14
    QualType Ty = E->getType();
8428
14
    llvm::Type *RealResTy = ConvertType(Ty);
8429
14
    llvm::Type *IntTy =
8430
14
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8431
8432
14
    Function *F = CGM.getIntrinsic(
8433
14
        BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? 
Intrinsic::arm_ldaex6
8434
14
                                                       : 
Intrinsic::arm_ldrex8
,
8435
14
        UnqualPtrTy);
8436
14
    CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8437
14
    Val->addParamAttr(
8438
14
        0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8439
8440
14
    if (RealResTy->isPointerTy())
8441
4
      return Builder.CreateIntToPtr(Val, RealResTy);
8442
10
    else {
8443
10
      llvm::Type *IntResTy = llvm::IntegerType::get(
8444
10
          getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8445
10
      return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8446
10
                                   RealResTy);
8447
10
    }
8448
14
  }
8449
8450
6.61k
  if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8451
6.61k
      ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8452
6.61k
        
BuiltinID == clang::ARM::BI__builtin_arm_strex6.60k
) &&
8453
6.61k
       
getContext().getTypeSize(E->getArg(0)->getType()) == 6416
)) {
8454
4
    Function *F = CGM.getIntrinsic(
8455
4
        BuiltinID == clang::ARM::BI__builtin_arm_stlex ? 
Intrinsic::arm_stlexd2
8456
4
                                                       : 
Intrinsic::arm_strexd2
);
8457
4
    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8458
8459
4
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8460
4
    Value *Val = EmitScalarExpr(E->getArg(0));
8461
4
    Builder.CreateStore(Val, Tmp);
8462
8463
4
    Address LdPtr = Tmp.withElementType(STy);
8464
4
    Val = Builder.CreateLoad(LdPtr);
8465
8466
4
    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8467
4
    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8468
4
    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
8469
4
    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8470
4
  }
8471
8472
6.60k
  if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8473
6.60k
      
BuiltinID == clang::ARM::BI__builtin_arm_stlex6.60k
) {
8474
12
    Value *StoreVal = EmitScalarExpr(E->getArg(0));
8475
12
    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8476
8477
12
    QualType Ty = E->getArg(0)->getType();
8478
12
    llvm::Type *StoreTy =
8479
12
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8480
8481
12
    if (StoreVal->getType()->isPointerTy())
8482
2
      StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8483
10
    else {
8484
10
      llvm::Type *IntTy = llvm::IntegerType::get(
8485
10
          getLLVMContext(),
8486
10
          CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8487
10
      StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8488
10
      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8489
10
    }
8490
8491
12
    Function *F = CGM.getIntrinsic(
8492
12
        BuiltinID == clang::ARM::BI__builtin_arm_stlex ? 
Intrinsic::arm_stlex5
8493
12
                                                       : 
Intrinsic::arm_strex7
,
8494
12
        StoreAddr->getType());
8495
8496
12
    CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8497
12
    CI->addParamAttr(
8498
12
        1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8499
12
    return CI;
8500
12
  }
8501
8502
6.59k
  if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8503
1
    Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8504
1
    return Builder.CreateCall(F);
8505
1
  }
8506
8507
  // CRC32
8508
6.59k
  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8509
6.59k
  switch (BuiltinID) {
8510
3
  case clang::ARM::BI__builtin_arm_crc32b:
8511
3
    CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8512
2
  case clang::ARM::BI__builtin_arm_crc32cb:
8513
2
    CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8514
2
  case clang::ARM::BI__builtin_arm_crc32h:
8515
2
    CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8516
2
  case clang::ARM::BI__builtin_arm_crc32ch:
8517
2
    CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8518
2
  case clang::ARM::BI__builtin_arm_crc32w:
8519
4
  case clang::ARM::BI__builtin_arm_crc32d:
8520
4
    CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8521
2
  case clang::ARM::BI__builtin_arm_crc32cw:
8522
4
  case clang::ARM::BI__builtin_arm_crc32cd:
8523
4
    CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8524
6.59k
  }
8525
8526
6.59k
  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8527
17
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
8528
17
    Value *Arg1 = EmitScalarExpr(E->getArg(1));
8529
8530
    // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8531
    // intrinsics, hence we need different codegen for these cases.
8532
17
    if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8533
17
        
BuiltinID == clang::ARM::BI__builtin_arm_crc32cd15
) {
8534
4
      Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8535
4
      Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8536
4
      Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8537
4
      Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8538
8539
4
      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8540
4
      Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8541
4
      return Builder.CreateCall(F, {Res, Arg1b});
8542
13
    } else {
8543
13
      Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8544
8545
13
      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8546
13
      return Builder.CreateCall(F, {Arg0, Arg1});
8547
13
    }
8548
17
  }
8549
8550
6.57k
  if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8551
6.57k
      
BuiltinID == clang::ARM::BI__builtin_arm_rsr646.57k
||
8552
6.57k
      
BuiltinID == clang::ARM::BI__builtin_arm_rsrp6.57k
||
8553
6.57k
      
BuiltinID == clang::ARM::BI__builtin_arm_wsr6.56k
||
8554
6.57k
      
BuiltinID == clang::ARM::BI__builtin_arm_wsr646.56k
||
8555
6.57k
      
BuiltinID == clang::ARM::BI__builtin_arm_wsrp6.56k
) {
8556
8557
16
    SpecialRegisterAccessKind AccessKind = Write;
8558
16
    if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8559
16
        
BuiltinID == clang::ARM::BI__builtin_arm_rsr6413
||
8560
16
        
BuiltinID == clang::ARM::BI__builtin_arm_rsrp10
)
8561
8
      AccessKind = VolatileRead;
8562
8563
16
    bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8564
16
                            
BuiltinID == clang::ARM::BI__builtin_arm_wsrp14
;
8565
8566
16
    bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8567
16
                   
BuiltinID == clang::ARM::BI__builtin_arm_wsr6413
;
8568
8569
16
    llvm::Type *ValueType;
8570
16
    llvm::Type *RegisterType;
8571
16
    if (IsPointerBuiltin) {
8572
4
      ValueType = VoidPtrTy;
8573
4
      RegisterType = Int32Ty;
8574
12
    } else if (Is64Bit) {
8575
6
      ValueType = RegisterType = Int64Ty;
8576
6
    } else {
8577
6
      ValueType = RegisterType = Int32Ty;
8578
6
    }
8579
8580
16
    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8581
16
                                      AccessKind);
8582
16
  }
8583
8584
6.56k
  if (BuiltinID == ARM::BI__builtin_sponentry) {
8585
1
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8586
1
    return Builder.CreateCall(F);
8587
1
  }
8588
8589
  // Handle MSVC intrinsics before argument evaluation to prevent double
8590
  // evaluation.
8591
6.56k
  if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8592
154
    return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8593
8594
  // Deal with MVE builtins
8595
6.40k
  if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8596
4.58k
    return Result;
8597
  // Handle CDE builtins
8598
1.81k
  if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8599
39
    return Result;
8600
8601
  // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8602
255k
  
auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) 1.77k
{
8603
255k
    return P.first == BuiltinID;
8604
255k
  });
8605
1.77k
  if (It != end(NEONEquivalentIntrinsicMap))
8606
129
    BuiltinID = It->second;
8607
8608
  // Find out if any arguments are required to be integer constant
8609
  // expressions.
8610
1.77k
  unsigned ICEArguments = 0;
8611
1.77k
  ASTContext::GetBuiltinTypeError Error;
8612
1.77k
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8613
1.77k
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
8614
8615
1.77k
  auto getAlignmentValue32 = [&](Address addr) -> Value* {
8616
6
    return Builder.getInt32(addr.getAlignment().getQuantity());
8617
6
  };
8618
8619
1.77k
  Address PtrOp0 = Address::invalid();
8620
1.77k
  Address PtrOp1 = Address::invalid();
8621
1.77k
  SmallVector<Value*, 4> Ops;
8622
1.77k
  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8623
1.77k
  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 
11.64k
:
0132
);
8624
6.14k
  for (unsigned i = 0, e = NumArgs; i != e; 
i++4.36k
) {
8625
4.36k
    if (i == 0) {
8626
1.77k
      switch (BuiltinID) {
8627
15
      case NEON::BI__builtin_neon_vld1_v:
8628
33
      case NEON::BI__builtin_neon_vld1q_v:
8629
47
      case NEON::BI__builtin_neon_vld1q_lane_v:
8630
60
      case NEON::BI__builtin_neon_vld1_lane_v:
8631
74
      case NEON::BI__builtin_neon_vld1_dup_v:
8632
87
      case NEON::BI__builtin_neon_vld1q_dup_v:
8633
100
      case NEON::BI__builtin_neon_vst1_v:
8634
114
      case NEON::BI__builtin_neon_vst1q_v:
8635
128
      case NEON::BI__builtin_neon_vst1q_lane_v:
8636
142
      case NEON::BI__builtin_neon_vst1_lane_v:
8637
155
      case NEON::BI__builtin_neon_vst2_v:
8638
166
      case NEON::BI__builtin_neon_vst2q_v:
8639
177
      case NEON::BI__builtin_neon_vst2_lane_v:
8640
185
      case NEON::BI__builtin_neon_vst2q_lane_v:
8641
198
      case NEON::BI__builtin_neon_vst3_v:
8642
209
      case NEON::BI__builtin_neon_vst3q_v:
8643
220
      case NEON::BI__builtin_neon_vst3_lane_v:
8644
228
      case NEON::BI__builtin_neon_vst3q_lane_v:
8645
241
      case NEON::BI__builtin_neon_vst4_v:
8646
252
      case NEON::BI__builtin_neon_vst4q_v:
8647
263
      case NEON::BI__builtin_neon_vst4_lane_v:
8648
271
      case NEON::BI__builtin_neon_vst4q_lane_v:
8649
        // Get the alignment for the argument in addition to the value;
8650
        // we'll use it later.
8651
271
        PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8652
271
        Ops.push_back(PtrOp0.getPointer());
8653
271
        continue;
8654
1.77k
      }
8655
1.77k
    }
8656
4.09k
    if (i == 1) {
8657
1.51k
      switch (BuiltinID) {
8658
13
      case NEON::BI__builtin_neon_vld2_v:
8659
24
      case NEON::BI__builtin_neon_vld2q_v:
8660
37
      case NEON::BI__builtin_neon_vld3_v:
8661
48
      case NEON::BI__builtin_neon_vld3q_v:
8662
61
      case NEON::BI__builtin_neon_vld4_v:
8663
72
      case NEON::BI__builtin_neon_vld4q_v:
8664
83
      case NEON::BI__builtin_neon_vld2_lane_v:
8665
91
      case NEON::BI__builtin_neon_vld2q_lane_v:
8666
102
      case NEON::BI__builtin_neon_vld3_lane_v:
8667
110
      case NEON::BI__builtin_neon_vld3q_lane_v:
8668
121
      case NEON::BI__builtin_neon_vld4_lane_v:
8669
129
      case NEON::BI__builtin_neon_vld4q_lane_v:
8670
142
      case NEON::BI__builtin_neon_vld2_dup_v:
8671
153
      case NEON::BI__builtin_neon_vld2q_dup_v:
8672
166
      case NEON::BI__builtin_neon_vld3_dup_v:
8673
177
      case NEON::BI__builtin_neon_vld3q_dup_v:
8674
190
      case NEON::BI__builtin_neon_vld4_dup_v:
8675
201
      case NEON::BI__builtin_neon_vld4q_dup_v:
8676
        // Get the alignment for the argument in addition to the value;
8677
        // we'll use it later.
8678
201
        PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
8679
201
        Ops.push_back(PtrOp1.getPointer());
8680
201
        continue;
8681
1.51k
      }
8682
1.51k
    }
8683
8684
3.88k
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
8685
3.88k
  }
8686
8687
1.77k
  switch (BuiltinID) {
8688
1.64k
  default: break;
8689
8690
1.64k
  case NEON::BI__builtin_neon_vget_lane_i8:
8691
7
  case NEON::BI__builtin_neon_vget_lane_i16:
8692
9
  case NEON::BI__builtin_neon_vget_lane_i32:
8693
11
  case NEON::BI__builtin_neon_vget_lane_i64:
8694
45
  case NEON::BI__builtin_neon_vget_lane_bf16:
8695
46
  case NEON::BI__builtin_neon_vget_lane_f32:
8696
49
  case NEON::BI__builtin_neon_vgetq_lane_i8:
8697
53
  case NEON::BI__builtin_neon_vgetq_lane_i16:
8698
55
  case NEON::BI__builtin_neon_vgetq_lane_i32:
8699
57
  case NEON::BI__builtin_neon_vgetq_lane_i64:
8700
91
  case NEON::BI__builtin_neon_vgetq_lane_bf16:
8701
92
  case NEON::BI__builtin_neon_vgetq_lane_f32:
8702
94
  case NEON::BI__builtin_neon_vduph_lane_bf16:
8703
96
  case NEON::BI__builtin_neon_vduph_laneq_bf16:
8704
96
    return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
8705
8706
1
  case NEON::BI__builtin_neon_vrndns_f32: {
8707
1
    Value *Arg = EmitScalarExpr(E->getArg(0));
8708
1
    llvm::Type *Tys[] = {Arg->getType()};
8709
1
    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
8710
1
    return Builder.CreateCall(F, {Arg}, "vrndn"); }
8711
8712
3
  case NEON::BI__builtin_neon_vset_lane_i8:
8713
7
  case NEON::BI__builtin_neon_vset_lane_i16:
8714
9
  case NEON::BI__builtin_neon_vset_lane_i32:
8715
11
  case NEON::BI__builtin_neon_vset_lane_i64:
8716
13
  case NEON::BI__builtin_neon_vset_lane_bf16:
8717
14
  case NEON::BI__builtin_neon_vset_lane_f32:
8718
17
  case NEON::BI__builtin_neon_vsetq_lane_i8:
8719
21
  case NEON::BI__builtin_neon_vsetq_lane_i16:
8720
23
  case NEON::BI__builtin_neon_vsetq_lane_i32:
8721
25
  case NEON::BI__builtin_neon_vsetq_lane_i64:
8722
27
  case NEON::BI__builtin_neon_vsetq_lane_bf16:
8723
28
  case NEON::BI__builtin_neon_vsetq_lane_f32:
8724
28
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
8725
8726
1
  case NEON::BI__builtin_neon_vsha1h_u32:
8727
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
8728
1
                        "vsha1h");
8729
1
  case NEON::BI__builtin_neon_vsha1cq_u32:
8730
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
8731
1
                        "vsha1h");
8732
1
  case NEON::BI__builtin_neon_vsha1pq_u32:
8733
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
8734
1
                        "vsha1h");
8735
1
  case NEON::BI__builtin_neon_vsha1mq_u32:
8736
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
8737
1
                        "vsha1h");
8738
8739
2
  case NEON::BI__builtin_neon_vcvth_bf16_f32: {
8740
2
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
8741
2
                        "vcvtbfp2bf");
8742
27
  }
8743
8744
  // The ARM _MoveToCoprocessor builtins put the input register value as
8745
  // the first argument, but the LLVM intrinsic expects it as the third one.
8746
1
  case clang::ARM::BI_MoveToCoprocessor:
8747
2
  case clang::ARM::BI_MoveToCoprocessor2: {
8748
2
    Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
8749
2
                                       ? 
Intrinsic::arm_mcr1
8750
2
                                       : 
Intrinsic::arm_mcr21
);
8751
2
    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
8752
2
                                  Ops[3], Ops[4], Ops[5]});
8753
1
  }
8754
1.77k
  }
8755
8756
  // Get the last argument, which specifies the vector type.
8757
1.64k
  assert(HasExtraArg);
8758
1.64k
  const Expr *Arg = E->getArg(E->getNumArgs()-1);
8759
1.64k
  std::optional<llvm::APSInt> Result =
8760
1.64k
      Arg->getIntegerConstantExpr(getContext());
8761
1.64k
  if (!Result)
8762
0
    return nullptr;
8763
8764
1.64k
  if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
8765
1.64k
      
BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d1.64k
) {
8766
    // Determine the overloaded type of this builtin.
8767
4
    llvm::Type *Ty;
8768
4
    if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
8769
2
      Ty = FloatTy;
8770
2
    else
8771
2
      Ty = DoubleTy;
8772
8773
    // Determine whether this is an unsigned conversion or not.
8774
4
    bool usgn = Result->getZExtValue() == 1;
8775
4
    unsigned Int = usgn ? 
Intrinsic::arm_vcvtru2
:
Intrinsic::arm_vcvtr2
;
8776
8777
    // Call the appropriate intrinsic.
8778
4
    Function *F = CGM.getIntrinsic(Int, Ty);
8779
4
    return Builder.CreateCall(F, Ops, "vcvtr");
8780
4
  }
8781
8782
  // Determine the type of this overloaded NEON intrinsic.
8783
1.64k
  NeonTypeFlags Type = Result->getZExtValue();
8784
1.64k
  bool usgn = Type.isUnsigned();
8785
1.64k
  bool rightShift = false;
8786
8787
1.64k
  llvm::FixedVectorType *VTy =
8788
1.64k
      GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
8789
1.64k
                  getTarget().hasBFloat16Type());
8790
1.64k
  llvm::Type *Ty = VTy;
8791
1.64k
  if (!Ty)
8792
0
    return nullptr;
8793
8794
  // Many NEON builtins have identical semantics and uses in ARM and
8795
  // AArch64. Emit these in a single function.
8796
1.64k
  auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
8797
1.64k
  const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
8798
1.64k
      IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
8799
1.64k
  if (Builtin)
8800
1.46k
    return EmitCommonNeonBuiltinExpr(
8801
1.46k
        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
8802
1.46k
        Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
8803
8804
179
  unsigned Int;
8805
179
  switch (BuiltinID) {
8806
0
  default: return nullptr;
8807
14
  case NEON::BI__builtin_neon_vld1q_lane_v:
8808
    // Handle 64-bit integer elements as a special case.  Use shuffles of
8809
    // one-element vectors to avoid poor code for i64 in the backend.
8810
14
    if (VTy->getElementType()->isIntegerTy(64)) {
8811
      // Extract the other lane.
8812
3
      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8813
3
      int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
8814
3
      Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
8815
3
      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8816
      // Load the value as a one-element vector.
8817
3
      Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
8818
3
      llvm::Type *Tys[] = {Ty, Int8PtrTy};
8819
3
      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
8820
3
      Value *Align = getAlignmentValue32(PtrOp0);
8821
3
      Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
8822
      // Combine them.
8823
3
      int Indices[] = {1 - Lane, Lane};
8824
3
      return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
8825
3
    }
8826
14
    
[[fallthrough]];11
8827
24
  case NEON::BI__builtin_neon_vld1_lane_v: {
8828
24
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8829
24
    PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8830
24
    Value *Ld = Builder.CreateLoad(PtrOp0);
8831
24
    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
8832
11
  }
8833
7
  case NEON::BI__builtin_neon_vqrshrn_n_v:
8834
7
    Int =
8835
7
      usgn ? 
Intrinsic::arm_neon_vqrshiftnu3
:
Intrinsic::arm_neon_vqrshiftns4
;
8836
7
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
8837
7
                        1, true);
8838
3
  case NEON::BI__builtin_neon_vqrshrun_n_v:
8839
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
8840
3
                        Ops, "vqrshrun_n", 1, true);
8841
6
  case NEON::BI__builtin_neon_vqshrn_n_v:
8842
6
    Int = usgn ? 
Intrinsic::arm_neon_vqshiftnu3
:
Intrinsic::arm_neon_vqshiftns3
;
8843
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
8844
6
                        1, true);
8845
3
  case NEON::BI__builtin_neon_vqshrun_n_v:
8846
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
8847
3
                        Ops, "vqshrun_n", 1, true);
8848
0
  case NEON::BI__builtin_neon_vrecpe_v:
8849
0
  case NEON::BI__builtin_neon_vrecpeq_v:
8850
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
8851
0
                        Ops, "vrecpe");
8852
6
  case NEON::BI__builtin_neon_vrshrn_n_v:
8853
6
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
8854
6
                        Ops, "vrshrn_n", 1, true);
8855
8
  case NEON::BI__builtin_neon_vrsra_n_v:
8856
16
  case NEON::BI__builtin_neon_vrsraq_n_v:
8857
16
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8858
16
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8859
16
    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
8860
16
    Int = usgn ? 
Intrinsic::arm_neon_vrshiftu8
:
Intrinsic::arm_neon_vrshifts8
;
8861
16
    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
8862
16
    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
8863
10
  case NEON::BI__builtin_neon_vsri_n_v:
8864
20
  case NEON::BI__builtin_neon_vsriq_n_v:
8865
20
    rightShift = true;
8866
20
    [[fallthrough]];
8867
30
  case NEON::BI__builtin_neon_vsli_n_v:
8868
40
  case NEON::BI__builtin_neon_vsliq_n_v:
8869
40
    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
8870
40
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
8871
40
                        Ops, "vsli_n");
8872
11
  case NEON::BI__builtin_neon_vsra_n_v:
8873
19
  case NEON::BI__builtin_neon_vsraq_n_v:
8874
19
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8875
19
    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8876
19
    return Builder.CreateAdd(Ops[0], Ops[1]);
8877
14
  case NEON::BI__builtin_neon_vst1q_lane_v:
8878
    // Handle 64-bit integer elements as a special case.  Use a shuffle to get
8879
    // a one-element vector and avoid poor code for i64 in the backend.
8880
14
    if (VTy->getElementType()->isIntegerTy(64)) {
8881
3
      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8882
3
      Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
8883
3
      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8884
3
      Ops[2] = getAlignmentValue32(PtrOp0);
8885
3
      llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
8886
3
      return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
8887
3
                                                 Tys), Ops);
8888
3
    }
8889
14
    
[[fallthrough]];11
8890
25
  case NEON::BI__builtin_neon_vst1_lane_v: {
8891
25
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8892
25
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
8893
25
    return Builder.CreateStore(Ops[1],
8894
25
                               PtrOp0.withElementType(Ops[1]->getType()));
8895
11
  }
8896
3
  case NEON::BI__builtin_neon_vtbl1_v:
8897
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
8898
3
                        Ops, "vtbl1");
8899
3
  case NEON::BI__builtin_neon_vtbl2_v:
8900
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
8901
3
                        Ops, "vtbl2");
8902
3
  case NEON::BI__builtin_neon_vtbl3_v:
8903
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
8904
3
                        Ops, "vtbl3");
8905
3
  case NEON::BI__builtin_neon_vtbl4_v:
8906
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
8907
3
                        Ops, "vtbl4");
8908
3
  case NEON::BI__builtin_neon_vtbx1_v:
8909
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
8910
3
                        Ops, "vtbx1");
8911
3
  case NEON::BI__builtin_neon_vtbx2_v:
8912
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
8913
3
                        Ops, "vtbx2");
8914
3
  case NEON::BI__builtin_neon_vtbx3_v:
8915
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
8916
3
                        Ops, "vtbx3");
8917
3
  case NEON::BI__builtin_neon_vtbx4_v:
8918
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
8919
3
                        Ops, "vtbx4");
8920
179
  }
8921
179
}
8922
8923
template<typename Integer>
8924
24
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
8925
24
  return E->getIntegerConstantExpr(Context)->getExtValue();
8926
24
}
8927
8928
static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
8929
40
                                     llvm::Type *T, bool Unsigned) {
8930
  // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
8931
  // which finds it convenient to specify signed/unsigned as a boolean flag.
8932
40
  return Unsigned ? 
Builder.CreateZExt(V, T)20
:
Builder.CreateSExt(V, T)20
;
8933
40
}
8934
8935
static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
8936
24
                                    uint32_t Shift, bool Unsigned) {
8937
  // MVE helper function for integer shift right. This must handle signed vs
8938
  // unsigned, and also deal specially with the case where the shift count is
8939
  // equal to the lane size. In LLVM IR, an LShr with that parameter would be
8940
  // undefined behavior, but in MVE it's legal, so we must convert it to code
8941
  // that is not undefined in IR.
8942
24
  unsigned LaneBits = cast<llvm::VectorType>(V->getType())
8943
24
                          ->getElementType()
8944
24
                          ->getPrimitiveSizeInBits();
8945
24
  if (Shift == LaneBits) {
8946
    // An unsigned shift of the full lane size always generates zero, so we can
8947
    // simply emit a zero vector. A signed shift of the full lane size does the
8948
    // same thing as shifting by one bit fewer.
8949
12
    if (Unsigned)
8950
6
      return llvm::Constant::getNullValue(V->getType());
8951
6
    else
8952
6
      --Shift;
8953
12
  }
8954
18
  return Unsigned ? 
Builder.CreateLShr(V, Shift)6
:
Builder.CreateAShr(V, Shift)12
;
8955
24
}
8956
8957
530
static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
8958
  // MVE-specific helper function for a vector splat, which infers the element
8959
  // count of the output vector by knowing that MVE vectors are all 128 bits
8960
  // wide.
8961
530
  unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
8962
530
  return Builder.CreateVectorSplat(Elements, V);
8963
530
}
8964
8965
static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
8966
                                            CodeGenFunction *CGF,
8967
                                            llvm::Value *V,
8968
459
                                            llvm::Type *DestType) {
8969
  // Convert one MVE vector type into another by reinterpreting its in-register
8970
  // format.
8971
  //
8972
  // Little-endian, this is identical to a bitcast (which reinterprets the
8973
  // memory format). But big-endian, they're not necessarily the same, because
8974
  // the register and memory formats map to each other differently depending on
8975
  // the lane size.
8976
  //
8977
  // We generate a bitcast whenever we can (if we're little-endian, or if the
8978
  // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
8979
  // that performs the different kind of reinterpretation.
8980
459
  if (CGF->getTarget().isBigEndian() &&
8981
459
      
V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()224
) {
8982
187
    return Builder.CreateCall(
8983
187
        CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
8984
187
                              {DestType, V->getType()}),
8985
187
        V);
8986
272
  } else {
8987
272
    return Builder.CreateBitCast(V, DestType);
8988
272
  }
8989
459
}
8990
8991
16
static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
8992
  // Make a shufflevector that extracts every other element of a vector (evens
8993
  // or odds, as desired).
8994
16
  SmallVector<int, 16> Indices;
8995
16
  unsigned InputElements =
8996
16
      cast<llvm::FixedVectorType>(V->getType())->getNumElements();
8997
112
  for (unsigned i = 0; i < InputElements; 
i += 296
)
8998
96
    Indices.push_back(i + Odd);
8999
16
  return Builder.CreateShuffleVector(V, Indices);
9000
16
}
9001
9002
static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9003
32
                              llvm::Value *V1) {
9004
  // Make a shufflevector that interleaves two vectors element by element.
9005
32
  assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9006
32
  SmallVector<int, 16> Indices;
9007
32
  unsigned InputElements =
9008
32
      cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9009
224
  for (unsigned i = 0; i < InputElements; 
i++192
) {
9010
192
    Indices.push_back(i);
9011
192
    Indices.push_back(i + InputElements);
9012
192
  }
9013
32
  return Builder.CreateShuffleVector(V0, V1, Indices);
9014
32
}
9015
9016
template<unsigned HighBit, unsigned OtherBits>
9017
36
static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9018
  // MVE-specific helper function to make a vector splat of a constant such as
9019
  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9020
36
  llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9021
36
  unsigned LaneBits = T->getPrimitiveSizeInBits();
9022
36
  uint32_t Value = HighBit << (LaneBits - 1);
9023
36
  if (OtherBits)
9024
24
    Value |= (1UL << (LaneBits - 1)) - 1;
9025
36
  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9026
36
  return ARMMVEVectorSplat(Builder, Lane);
9027
36
}
CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<1u, 0u>(clang::CodeGen::CGBuilderTy&, llvm::Type*)
Line
Count
Source
9017
12
static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9018
  // MVE-specific helper function to make a vector splat of a constant such as
9019
  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9020
12
  llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9021
12
  unsigned LaneBits = T->getPrimitiveSizeInBits();
9022
12
  uint32_t Value = HighBit << (LaneBits - 1);
9023
12
  if (OtherBits)
9024
0
    Value |= (1UL << (LaneBits - 1)) - 1;
9025
12
  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9026
12
  return ARMMVEVectorSplat(Builder, Lane);
9027
12
}
CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<0u, 1u>(clang::CodeGen::CGBuilderTy&, llvm::Type*)
Line
Count
Source
9017
12
static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9018
  // MVE-specific helper function to make a vector splat of a constant such as
9019
  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9020
12
  llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9021
12
  unsigned LaneBits = T->getPrimitiveSizeInBits();
9022
12
  uint32_t Value = HighBit << (LaneBits - 1);
9023
12
  if (OtherBits)
9024
12
    Value |= (1UL << (LaneBits - 1)) - 1;
9025
12
  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9026
12
  return ARMMVEVectorSplat(Builder, Lane);
9027
12
}
CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<1u, 1u>(clang::CodeGen::CGBuilderTy&, llvm::Type*)
Line
Count
Source
9017
12
static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9018
  // MVE-specific helper function to make a vector splat of a constant such as
9019
  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9020
12
  llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9021
12
  unsigned LaneBits = T->getPrimitiveSizeInBits();
9022
12
  uint32_t Value = HighBit << (LaneBits - 1);
9023
12
  if (OtherBits)
9024
12
    Value |= (1UL << (LaneBits - 1)) - 1;
9025
12
  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9026
12
  return ARMMVEVectorSplat(Builder, Lane);
9027
12
}
9028
9029
static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9030
                                               llvm::Value *V,
9031
46
                                               unsigned ReverseWidth) {
9032
  // MVE-specific helper function which reverses the elements of a
9033
  // vector within every (ReverseWidth)-bit collection of lanes.
9034
46
  SmallVector<int, 16> Indices;
9035
46
  unsigned LaneSize = V->getType()->getScalarSizeInBits();
9036
46
  unsigned Elements = 128 / LaneSize;
9037
46
  unsigned Mask = ReverseWidth / LaneSize - 1;
9038
550
  for (unsigned i = 0; i < Elements; 
i++504
)
9039
504
    Indices.push_back(i ^ Mask);
9040
46
  return Builder.CreateShuffleVector(V, Indices);
9041
46
}
9042
9043
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
9044
                                              const CallExpr *E,
9045
                                              ReturnValueSlot ReturnValue,
9046
6.40k
                                              llvm::Triple::ArchType Arch) {
9047
6.40k
  enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9048
6.40k
  Intrinsic::ID IRIntr;
9049
6.40k
  unsigned NumVectors;
9050
9051
  // Code autogenerated by Tablegen will handle all the simple builtins.
9052
6.40k
  switch (BuiltinID) {
9053
4
    #include "clang/Basic/arm_mve_builtin_cg.inc"
9054
9055
    // If we didn't match an MVE builtin id at all, go back to the
9056
    // main EmitARMBuiltinExpr.
9057
1.81k
  default:
9058
1.81k
    return nullptr;
9059
6.40k
  }
9060
9061
  // Anything that breaks from that switch is an MVE builtin that
9062
  // needs handwritten code to generate.
9063
9064
14
  switch (CustomCodeGenType) {
9065
9066
8
  case CustomCodeGen::VLD24: {
9067
8
    llvm::SmallVector<Value *, 4> Ops;
9068
8
    llvm::SmallVector<llvm::Type *, 4> Tys;
9069
9070
8
    auto MvecCType = E->getType();
9071
8
    auto MvecLType = ConvertType(MvecCType);
9072
8
    assert(MvecLType->isStructTy() &&
9073
8
           "Return type for vld[24]q should be a struct");
9074
8
    assert(MvecLType->getStructNumElements() == 1 &&
9075
8
           "Return-type struct for vld[24]q should have one element");
9076
8
    auto MvecLTypeInner = MvecLType->getStructElementType(0);
9077
8
    assert(MvecLTypeInner->isArrayTy() &&
9078
8
           "Return-type struct for vld[24]q should contain an array");
9079
8
    assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9080
8
           "Array member of return-type struct vld[24]q has wrong length");
9081
8
    auto VecLType = MvecLTypeInner->getArrayElementType();
9082
9083
8
    Tys.push_back(VecLType);
9084
9085
8
    auto Addr = E->getArg(0);
9086
8
    Ops.push_back(EmitScalarExpr(Addr));
9087
8
    Tys.push_back(ConvertType(Addr->getType()));
9088
9089
8
    Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9090
8
    Value *LoadResult = Builder.CreateCall(F, Ops);
9091
8
    Value *MvecOut = PoisonValue::get(MvecLType);
9092
28
    for (unsigned i = 0; i < NumVectors; 
++i20
) {
9093
20
      Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9094
20
      MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9095
20
    }
9096
9097
8
    if (ReturnValue.isNull())
9098
0
      return MvecOut;
9099
8
    else
9100
8
      return Builder.CreateStore(MvecOut, ReturnValue.getValue());
9101
8
  }
9102
9103
6
  case CustomCodeGen::VST24: {
9104
6
    llvm::SmallVector<Value *, 4> Ops;
9105
6
    llvm::SmallVector<llvm::Type *, 4> Tys;
9106
9107
6
    auto Addr = E->getArg(0);
9108
6
    Ops.push_back(EmitScalarExpr(Addr));
9109
6
    Tys.push_back(ConvertType(Addr->getType()));
9110
9111
6
    auto MvecCType = E->getArg(1)->getType();
9112
6
    auto MvecLType = ConvertType(MvecCType);
9113
6
    assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9114
6
    assert(MvecLType->getStructNumElements() == 1 &&
9115
6
           "Data-type struct for vst2q should have one element");
9116
6
    auto MvecLTypeInner = MvecLType->getStructElementType(0);
9117
6
    assert(MvecLTypeInner->isArrayTy() &&
9118
6
           "Data-type struct for vst2q should contain an array");
9119
6
    assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9120
6
           "Array member of return-type struct vld[24]q has wrong length");
9121
6
    auto VecLType = MvecLTypeInner->getArrayElementType();
9122
9123
6
    Tys.push_back(VecLType);
9124
9125
6
    AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9126
6
    EmitAggExpr(E->getArg(1), MvecSlot);
9127
6
    auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9128
22
    for (unsigned i = 0; i < NumVectors; 
i++16
)
9129
16
      Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9130
9131
6
    Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9132
6
    Value *ToReturn = nullptr;
9133
22
    for (unsigned i = 0; i < NumVectors; 
i++16
) {
9134
16
      Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9135
16
      ToReturn = Builder.CreateCall(F, Ops);
9136
16
      Ops.pop_back();
9137
16
    }
9138
6
    return ToReturn;
9139
6
  }
9140
14
  }
9141
0
  llvm_unreachable("unknown custom codegen type.");
9142
0
}
9143
9144
Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
9145
                                              const CallExpr *E,
9146
                                              ReturnValueSlot ReturnValue,
9147
1.81k
                                              llvm::Triple::ArchType Arch) {
9148
1.81k
  switch (BuiltinID) {
9149
1.77k
  default:
9150
1.77k
    return nullptr;
9151
1.81k
#include 
"clang/Basic/arm_cde_builtin_cg.inc"1
9152
1.81k
  }
9153
1.81k
}
9154
9155
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9156
                                      const CallExpr *E,
9157
                                      SmallVectorImpl<Value *> &Ops,
9158
1.47k
                                      llvm::Triple::ArchType Arch) {
9159
1.47k
  unsigned int Int = 0;
9160
1.47k
  const char *s = nullptr;
9161
9162
1.47k
  switch (BuiltinID) {
9163
1.40k
  default:
9164
1.40k
    return nullptr;
9165
3
  case NEON::BI__builtin_neon_vtbl1_v:
9166
6
  case NEON::BI__builtin_neon_vqtbl1_v:
9167
9
  case NEON::BI__builtin_neon_vqtbl1q_v:
9168
12
  case NEON::BI__builtin_neon_vtbl2_v:
9169
15
  case NEON::BI__builtin_neon_vqtbl2_v:
9170
18
  case NEON::BI__builtin_neon_vqtbl2q_v:
9171
21
  case NEON::BI__builtin_neon_vtbl3_v:
9172
24
  case NEON::BI__builtin_neon_vqtbl3_v:
9173
27
  case NEON::BI__builtin_neon_vqtbl3q_v:
9174
30
  case NEON::BI__builtin_neon_vtbl4_v:
9175
33
  case NEON::BI__builtin_neon_vqtbl4_v:
9176
36
  case NEON::BI__builtin_neon_vqtbl4q_v:
9177
36
    break;
9178
3
  case NEON::BI__builtin_neon_vtbx1_v:
9179
6
  case NEON::BI__builtin_neon_vqtbx1_v:
9180
9
  case NEON::BI__builtin_neon_vqtbx1q_v:
9181
12
  case NEON::BI__builtin_neon_vtbx2_v:
9182
15
  case NEON::BI__builtin_neon_vqtbx2_v:
9183
18
  case NEON::BI__builtin_neon_vqtbx2q_v:
9184
21
  case NEON::BI__builtin_neon_vtbx3_v:
9185
24
  case NEON::BI__builtin_neon_vqtbx3_v:
9186
27
  case NEON::BI__builtin_neon_vqtbx3q_v:
9187
30
  case NEON::BI__builtin_neon_vtbx4_v:
9188
33
  case NEON::BI__builtin_neon_vqtbx4_v:
9189
36
  case NEON::BI__builtin_neon_vqtbx4q_v:
9190
36
    break;
9191
1.47k
  }
9192
9193
72
  assert(E->getNumArgs() >= 3);
9194
9195
  // Get the last argument, which specifies the vector type.
9196
72
  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9197
72
  std::optional<llvm::APSInt> Result =
9198
72
      Arg->getIntegerConstantExpr(CGF.getContext());
9199
72
  if (!Result)
9200
0
    return nullptr;
9201
9202
  // Determine the type of this overloaded NEON intrinsic.
9203
72
  NeonTypeFlags Type = Result->getZExtValue();
9204
72
  llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9205
72
  if (!Ty)
9206
0
    return nullptr;
9207
9208
72
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
9209
9210
  // AArch64 scalar builtins are not overloaded, they do not have an extra
9211
  // argument that specifies the vector type, need to handle each case.
9212
72
  switch (BuiltinID) {
9213
3
  case NEON::BI__builtin_neon_vtbl1_v: {
9214
3
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9215
3
                              Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9216
0
  }
9217
3
  case NEON::BI__builtin_neon_vtbl2_v: {
9218
3
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9219
3
                              Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9220
0
  }
9221
3
  case NEON::BI__builtin_neon_vtbl3_v: {
9222
3
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9223
3
                              Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9224
0
  }
9225
3
  case NEON::BI__builtin_neon_vtbl4_v: {
9226
3
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9227
3
                              Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9228
0
  }
9229
3
  case NEON::BI__builtin_neon_vtbx1_v: {
9230
3
    Value *TblRes =
9231
3
        packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9232
3
                           Intrinsic::aarch64_neon_tbl1, "vtbl1");
9233
9234
3
    llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9235
3
    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9236
3
    CmpRes = Builder.CreateSExt(CmpRes, Ty);
9237
9238
3
    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9239
3
    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9240
3
    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9241
0
  }
9242
3
  case NEON::BI__builtin_neon_vtbx2_v: {
9243
3
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9244
3
                              Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9245
0
  }
9246
3
  case NEON::BI__builtin_neon_vtbx3_v: {
9247
3
    Value *TblRes =
9248
3
        packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9249
3
                           Intrinsic::aarch64_neon_tbl2, "vtbl2");
9250
9251
3
    llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9252
3
    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9253
3
                                           TwentyFourV);
9254
3
    CmpRes = Builder.CreateSExt(CmpRes, Ty);
9255
9256
3
    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9257
3
    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9258
3
    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9259
0
  }
9260
3
  case NEON::BI__builtin_neon_vtbx4_v: {
9261
3
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9262
3
                              Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9263
0
  }
9264
3
  case NEON::BI__builtin_neon_vqtbl1_v:
9265
6
  case NEON::BI__builtin_neon_vqtbl1q_v:
9266
6
    Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9267
3
  case NEON::BI__builtin_neon_vqtbl2_v:
9268
6
  case NEON::BI__builtin_neon_vqtbl2q_v: {
9269
6
    Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9270
3
  case NEON::BI__builtin_neon_vqtbl3_v:
9271
6
  case NEON::BI__builtin_neon_vqtbl3q_v:
9272
6
    Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9273
3
  case NEON::BI__builtin_neon_vqtbl4_v:
9274
6
  case NEON::BI__builtin_neon_vqtbl4q_v:
9275
6
    Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9276
3
  case NEON::BI__builtin_neon_vqtbx1_v:
9277
6
  case NEON::BI__builtin_neon_vqtbx1q_v:
9278
6
    Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9279
3
  case NEON::BI__builtin_neon_vqtbx2_v:
9280
6
  case NEON::BI__builtin_neon_vqtbx2q_v:
9281
6
    Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9282
3
  case NEON::BI__builtin_neon_vqtbx3_v:
9283
6
  case NEON::BI__builtin_neon_vqtbx3q_v:
9284
6
    Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9285
3
  case NEON::BI__builtin_neon_vqtbx4_v:
9286
6
  case NEON::BI__builtin_neon_vqtbx4q_v:
9287
6
    Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9288
3
  }
9289
72
  }
9290
9291
48
  if (!Int)
9292
0
    return nullptr;
9293
9294
48
  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9295
48
  return CGF.EmitNeonCall(F, Ops, s);
9296
48
}
9297
9298
12
Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
9299
12
  auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9300
12
  Op = Builder.CreateBitCast(Op, Int16Ty);
9301
12
  Value *V = PoisonValue::get(VTy);
9302
12
  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9303
12
  Op = Builder.CreateInsertElement(V, Op, CI);
9304
12
  return Op;
9305
12
}
9306
9307
/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9308
/// access builtin.  Only required if it can't be inferred from the base pointer
9309
/// operand.
9310
3.76k
llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9311
3.76k
  switch (TypeFlags.getMemEltType()) {
9312
1.27k
  case SVETypeFlags::MemEltTyDefault:
9313
1.27k
    return getEltType(TypeFlags);
9314
679
  case SVETypeFlags::MemEltTyInt8:
9315
679
    return Builder.getInt8Ty();
9316
1.08k
  case SVETypeFlags::MemEltTyInt16:
9317
1.08k
    return Builder.getInt16Ty();
9318
649
  case SVETypeFlags::MemEltTyInt32:
9319
649
    return Builder.getInt32Ty();
9320
75
  case SVETypeFlags::MemEltTyInt64:
9321
75
    return Builder.getInt64Ty();
9322
3.76k
  }
9323
0
  llvm_unreachable("Unknown MemEltType");
9324
0
}
9325
9326
1.27k
llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9327
1.27k
  switch (TypeFlags.getEltType()) {
9328
0
  default:
9329
0
    llvm_unreachable("Invalid SVETypeFlag!");
9330
9331
49
  case SVETypeFlags::EltTyInt8:
9332
49
    return Builder.getInt8Ty();
9333
49
  case SVETypeFlags::EltTyInt16:
9334
49
    return Builder.getInt16Ty();
9335
355
  case SVETypeFlags::EltTyInt32:
9336
355
    return Builder.getInt32Ty();
9337
427
  case SVETypeFlags::EltTyInt64:
9338
427
    return Builder.getInt64Ty();
9339
48
  case SVETypeFlags::EltTyInt128:
9340
48
    return Builder.getInt128Ty();
9341
9342
0
  case SVETypeFlags::EltTyFloat16:
9343
0
    return Builder.getHalfTy();
9344
153
  case SVETypeFlags::EltTyFloat32:
9345
153
    return Builder.getFloatTy();
9346
189
  case SVETypeFlags::EltTyFloat64:
9347
189
    return Builder.getDoubleTy();
9348
9349
0
  case SVETypeFlags::EltTyBFloat16:
9350
0
    return Builder.getBFloatTy();
9351
9352
0
  case SVETypeFlags::EltTyBool8:
9353
0
  case SVETypeFlags::EltTyBool16:
9354
0
  case SVETypeFlags::EltTyBool32:
9355
0
  case SVETypeFlags::EltTyBool64:
9356
0
    return Builder.getInt1Ty();
9357
1.27k
  }
9358
1.27k
}
9359
9360
// Return the llvm predicate vector type corresponding to the specified element
9361
// TypeFlags.
9362
llvm::ScalableVectorType *
9363
168
CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
9364
168
  switch (TypeFlags.getEltType()) {
9365
0
  default: llvm_unreachable("Unhandled SVETypeFlag!");
9366
9367
24
  case SVETypeFlags::EltTyInt8:
9368
24
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9369
24
  case SVETypeFlags::EltTyInt16:
9370
24
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9371
24
  case SVETypeFlags::EltTyInt32:
9372
24
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9373
24
  case SVETypeFlags::EltTyInt64:
9374
24
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9375
9376
12
  case SVETypeFlags::EltTyBFloat16:
9377
12
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9378
12
  case SVETypeFlags::EltTyFloat16:
9379
12
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9380
12
  case SVETypeFlags::EltTyFloat32:
9381
12
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9382
12
  case SVETypeFlags::EltTyFloat64:
9383
12
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9384
9385
6
  case SVETypeFlags::EltTyBool8:
9386
6
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9387
6
  case SVETypeFlags::EltTyBool16:
9388
6
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9389
6
  case SVETypeFlags::EltTyBool32:
9390
6
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9391
6
  case SVETypeFlags::EltTyBool64:
9392
6
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9393
168
  }
9394
168
}
9395
9396
// Return the llvm vector type corresponding to the specified element TypeFlags.
9397
llvm::ScalableVectorType *
9398
47.8k
CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9399
47.8k
  switch (TypeFlags.getEltType()) {
9400
0
  default:
9401
0
    llvm_unreachable("Invalid SVETypeFlag!");
9402
9403
7.02k
  case SVETypeFlags::EltTyInt8:
9404
7.02k
    return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9405
8.46k
  case SVETypeFlags::EltTyInt16:
9406
8.46k
    return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9407
10.5k
  case SVETypeFlags::EltTyInt32:
9408
10.5k
    return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9409
10.9k
  case SVETypeFlags::EltTyInt64:
9410
10.9k
    return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9411
9412
2.72k
  case SVETypeFlags::EltTyFloat16:
9413
2.72k
    return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9414
460
  case SVETypeFlags::EltTyBFloat16:
9415
460
    return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9416
3.03k
  case SVETypeFlags::EltTyFloat32:
9417
3.03k
    return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9418
3.10k
  case SVETypeFlags::EltTyFloat64:
9419
3.10k
    return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9420
9421
939
  case SVETypeFlags::EltTyBool8:
9422
939
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9423
207
  case SVETypeFlags::EltTyBool16:
9424
207
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9425
207
  case SVETypeFlags::EltTyBool32:
9426
207
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9427
207
  case SVETypeFlags::EltTyBool64:
9428
207
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9429
47.8k
  }
9430
47.8k
}
9431
9432
llvm::Value *
9433
24
CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
9434
24
  Function *Ptrue =
9435
24
      CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9436
24
  return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9437
24
}
9438
9439
constexpr unsigned SVEBitsPerBlock = 128;
9440
9441
7.95k
static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9442
7.95k
  unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9443
7.95k
  return llvm::ScalableVectorType::get(EltTy, NumElts);
9444
7.95k
}
9445
9446
// Reinterpret the input predicate so that it can be used to correctly isolate
9447
// the elements of the specified datatype.
9448
Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
9449
26.5k
                                             llvm::ScalableVectorType *VTy) {
9450
9451
26.5k
  if (isa<TargetExtType>(Pred->getType()) &&
9452
26.5k
      
cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount"704
)
9453
704
    return Pred;
9454
9455
25.8k
  auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9456
25.8k
  if (Pred->getType() == RTy)
9457
4.62k
    return Pred;
9458
9459
21.2k
  unsigned IntID;
9460
21.2k
  llvm::Type *IntrinsicTy;
9461
21.2k
  switch (VTy->getMinNumElements()) {
9462
0
  default:
9463
0
    llvm_unreachable("unsupported element count!");
9464
48
  case 1:
9465
7.75k
  case 2:
9466
14.5k
  case 4:
9467
19.8k
  case 8:
9468
19.8k
    IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9469
19.8k
    IntrinsicTy = RTy;
9470
19.8k
    break;
9471
1.36k
  case 16:
9472
1.36k
    IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9473
1.36k
    IntrinsicTy = Pred->getType();
9474
1.36k
    break;
9475
21.2k
  }
9476
9477
21.2k
  Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9478
21.2k
  Value *C = Builder.CreateCall(F, Pred);
9479
21.2k
  assert(C->getType() == RTy && "Unexpected return type!");
9480
21.2k
  return C;
9481
21.2k
}
9482
9483
Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
9484
                                          SmallVectorImpl<Value *> &Ops,
9485
2.37k
                                          unsigned IntID) {
9486
2.37k
  auto *ResultTy = getSVEType(TypeFlags);
9487
2.37k
  auto *OverloadedTy =
9488
2.37k
      llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9489
9490
  // At the ACLE level there's only one predicate type, svbool_t, which is
9491
  // mapped to <n x 16 x i1>. However, this might be incompatible with the
9492
  // actual type being loaded. For example, when loading doubles (i64) the
9493
  // predicated should be <n x 2 x i1> instead. At the IR level the type of
9494
  // the predicate and the data being loaded must match. Cast accordingly.
9495
2.37k
  Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9496
9497
2.37k
  Function *F = nullptr;
9498
2.37k
  if (Ops[1]->getType()->isVectorTy())
9499
    // This is the "vector base, scalar offset" case. In order to uniquely
9500
    // map this built-in to an LLVM IR intrinsic, we need both the return type
9501
    // and the type of the vector base.
9502
1.12k
    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9503
1.25k
  else
9504
    // This is the "scalar base, vector offset case". The type of the offset
9505
    // is encoded in the name of the intrinsic. We only need to specify the
9506
    // return type in order to uniquely map this built-in to an LLVM IR
9507
    // intrinsic.
9508
1.25k
    F = CGM.getIntrinsic(IntID, OverloadedTy);
9509
9510
  // Pass 0 when the offset is missing. This can only be applied when using
9511
  // the "vector base" addressing mode for which ACLE allows no offset. The
9512
  // corresponding LLVM IR always requires an offset.
9513
2.37k
  if (Ops.size() == 2) {
9514
416
    assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9515
416
    Ops.push_back(ConstantInt::get(Int64Ty, 0));
9516
416
  }
9517
9518
  // For "vector base, scalar index" scale the index so that it becomes a
9519
  // scalar offset.
9520
2.37k
  if (!TypeFlags.isByteIndexed() && 
Ops[1]->getType()->isVectorTy()1.19k
) {
9521
704
    unsigned BytesPerElt =
9522
704
        OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9523
704
    Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9524
704
  }
9525
9526
2.37k
  Value *Call = Builder.CreateCall(F, Ops);
9527
9528
  // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9529
  // other cases it's folded into a nop.
9530
2.37k
  return TypeFlags.isZExtReturn() ? 
Builder.CreateZExt(Call, ResultTy)880
9531
2.37k
                                  : 
Builder.CreateSExt(Call, ResultTy)1.49k
;
9532
2.37k
}
9533
9534
Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
9535
                                            SmallVectorImpl<Value *> &Ops,
9536
838
                                            unsigned IntID) {
9537
838
  auto *SrcDataTy = getSVEType(TypeFlags);
9538
838
  auto *OverloadedTy =
9539
838
      llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9540
9541
  // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9542
  // it's the first argument. Move it accordingly.
9543
838
  Ops.insert(Ops.begin(), Ops.pop_back_val());
9544
9545
838
  Function *F = nullptr;
9546
838
  if (Ops[2]->getType()->isVectorTy())
9547
    // This is the "vector base, scalar offset" case. In order to uniquely
9548
    // map this built-in to an LLVM IR intrinsic, we need both the return type
9549
    // and the type of the vector base.
9550
412
    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9551
426
  else
9552
    // This is the "scalar base, vector offset case". The type of the offset
9553
    // is encoded in the name of the intrinsic. We only need to specify the
9554
    // return type in order to uniquely map this built-in to an LLVM IR
9555
    // intrinsic.
9556
426
    F = CGM.getIntrinsic(IntID, OverloadedTy);
9557
9558
  // Pass 0 when the offset is missing. This can only be applied when using
9559
  // the "vector base" addressing mode for which ACLE allows no offset. The
9560
  // corresponding LLVM IR always requires an offset.
9561
838
  if (Ops.size() == 3) {
9562
148
    assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9563
148
    Ops.push_back(ConstantInt::get(Int64Ty, 0));
9564
148
  }
9565
9566
  // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9567
  // folded into a nop.
9568
838
  Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9569
9570
  // At the ACLE level there's only one predicate type, svbool_t, which is
9571
  // mapped to <n x 16 x i1>. However, this might be incompatible with the
9572
  // actual type being stored. For example, when storing doubles (i64) the
9573
  // predicated should be <n x 2 x i1> instead. At the IR level the type of
9574
  // the predicate and the data being stored must match. Cast accordingly.
9575
838
  Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
9576
9577
  // For "vector base, scalar index" scale the index so that it becomes a
9578
  // scalar offset.
9579
838
  if (!TypeFlags.isByteIndexed() && 
Ops[2]->getType()->isVectorTy()436
) {
9580
264
    unsigned BytesPerElt =
9581
264
        OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9582
264
    Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9583
264
  }
9584
9585
838
  return Builder.CreateCall(F, Ops);
9586
838
}
9587
9588
Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
9589
                                              SmallVectorImpl<Value *> &Ops,
9590
160
                                              unsigned IntID) {
9591
  // The gather prefetches are overloaded on the vector input - this can either
9592
  // be the vector of base addresses or vector of offsets.
9593
160
  auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9594
160
  if (!OverloadedTy)
9595
80
    OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9596
9597
  // Cast the predicate from svbool_t to the right number of elements.
9598
160
  Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9599
9600
  // vector + imm addressing modes
9601
160
  if (Ops[1]->getType()->isVectorTy()) {
9602
80
    if (Ops.size() == 3) {
9603
      // Pass 0 for 'vector+imm' when the index is omitted.
9604
40
      Ops.push_back(ConstantInt::get(Int64Ty, 0));
9605
9606
      // The sv_prfop is the last operand in the builtin and IR intrinsic.
9607
40
      std::swap(Ops[2], Ops[3]);
9608
40
    } else {
9609
      // Index needs to be passed as scaled offset.
9610
40
      llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9611
40
      unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9612
40
      if (BytesPerElt > 1)
9613
30
        Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9614
40
    }
9615
80
  }
9616
9617
160
  Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9618
160
  return Builder.CreateCall(F, Ops);
9619
160
}
9620
9621
Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
9622
                                          SmallVectorImpl<Value*> &Ops,
9623
640
                                          unsigned IntID) {
9624
640
  llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9625
9626
640
  unsigned N;
9627
640
  switch (IntID) {
9628
96
  case Intrinsic::aarch64_sve_ld2_sret:
9629
184
  case Intrinsic::aarch64_sve_ld1_pn_x2:
9630
272
  case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9631
272
    N = 2;
9632
272
    break;
9633
96
  case Intrinsic::aarch64_sve_ld3_sret:
9634
96
    N = 3;
9635
96
    break;
9636
96
  case Intrinsic::aarch64_sve_ld4_sret:
9637
184
  case Intrinsic::aarch64_sve_ld1_pn_x4:
9638
272
  case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9639
272
    N = 4;
9640
272
    break;
9641
0
  default:
9642
0
    llvm_unreachable("unknown intrinsic!");
9643
640
  }
9644
640
  auto RetTy = llvm::VectorType::get(VTy->getElementType(),
9645
640
                                     VTy->getElementCount() * N);
9646
9647
640
  Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9648
640
  Value *BasePtr = Ops[1];
9649
9650
  // Does the load have an offset?
9651
640
  if (Ops.size() > 2)
9652
320
    BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9653
9654
640
  Function *F = CGM.getIntrinsic(IntID, {VTy});
9655
640
  Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
9656
640
  unsigned MinElts = VTy->getMinNumElements();
9657
640
  Value *Ret = llvm::PoisonValue::get(RetTy);
9658
2.56k
  for (unsigned I = 0; I < N; 
I++1.92k
) {
9659
1.92k
    Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9660
1.92k
    Value *SRet = Builder.CreateExtractValue(Call, I);
9661
1.92k
    Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
9662
1.92k
  }
9663
640
  return Ret;
9664
640
}
9665
9666
Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
9667
                                           SmallVectorImpl<Value*> &Ops,
9668
706
                                           unsigned IntID) {
9669
706
  llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9670
9671
706
  unsigned N;
9672
706
  switch (IntID) {
9673
118
  case Intrinsic::aarch64_sve_st2:
9674
206
  case Intrinsic::aarch64_sve_st1_pn_x2:
9675
294
  case Intrinsic::aarch64_sve_stnt1_pn_x2:
9676
294
    N = 2;
9677
294
    break;
9678
118
  case Intrinsic::aarch64_sve_st3:
9679
118
    N = 3;
9680
118
    break;
9681
118
  case Intrinsic::aarch64_sve_st4:
9682
206
  case Intrinsic::aarch64_sve_st1_pn_x4:
9683
294
  case Intrinsic::aarch64_sve_stnt1_pn_x4:
9684
294
    N = 4;
9685
294
    break;
9686
0
  default:
9687
0
    llvm_unreachable("unknown intrinsic!");
9688
706
  }
9689
9690
706
  Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9691
706
  Value *BasePtr = Ops[1];
9692
9693
  // Does the store have an offset?
9694
706
  if (Ops.size() > (2 + N))
9695
353
    BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9696
9697
  // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
9698
  // need to break up the tuple vector.
9699
706
  SmallVector<llvm::Value*, 5> Operands;
9700
2.82k
  for (unsigned I = Ops.size() - N; I < Ops.size(); 
++I2.11k
)
9701
2.11k
    Operands.push_back(Ops[I]);
9702
706
  Operands.append({Predicate, BasePtr});
9703
706
  Function *F = CGM.getIntrinsic(IntID, { VTy });
9704
9705
706
  return Builder.CreateCall(F, Operands);
9706
706
}
9707
9708
// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
9709
// svpmullt_pair intrinsics, with the exception that their results are bitcast
9710
// to a wider type.
9711
Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
9712
                                     SmallVectorImpl<Value *> &Ops,
9713
48
                                     unsigned BuiltinID) {
9714
  // Splat scalar operand to vector (intrinsics with _n infix)
9715
48
  if (TypeFlags.hasSplatOperand()) {
9716
24
    unsigned OpNo = TypeFlags.getSplatOperand();
9717
24
    Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
9718
24
  }
9719
9720
  // The pair-wise function has a narrower overloaded type.
9721
48
  Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
9722
48
  Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
9723
9724
  // Now bitcast to the wider result type.
9725
48
  llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
9726
48
  return EmitSVEReinterpret(Call, Ty);
9727
48
}
9728
9729
Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
9730
72
                                    ArrayRef<Value *> Ops, unsigned BuiltinID) {
9731
72
  llvm::Type *OverloadedTy = getSVEType(TypeFlags);
9732
72
  Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
9733
72
  return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
9734
72
}
9735
9736
Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
9737
                                            SmallVectorImpl<Value *> &Ops,
9738
260
                                            unsigned BuiltinID) {
9739
260
  auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9740
260
  auto *VectorTy = getSVEVectorForElementType(MemEltTy);
9741
260
  auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9742
9743
260
  Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9744
260
  Value *BasePtr = Ops[1];
9745
9746
  // Implement the index operand if not omitted.
9747
260
  if (Ops.size() > 3)
9748
20
    BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9749
9750
260
  Value *PrfOp = Ops.back();
9751
9752
260
  Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
9753
260
  return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
9754
260
}
9755
9756
Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
9757
                                          llvm::Type *ReturnTy,
9758
                                          SmallVectorImpl<Value *> &Ops,
9759
                                          unsigned BuiltinID,
9760
1.10k
                                          bool IsZExtReturn) {
9761
1.10k
  QualType LangPTy = E->getArg(1)->getType();
9762
1.10k
  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9763
1.10k
      LangPTy->castAs<PointerType>()->getPointeeType());
9764
9765
  // The vector type that is returned may be different from the
9766
  // eventual type loaded from memory.
9767
1.10k
  auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
9768
1.10k
  auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9769
9770
1.10k
  Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9771
1.10k
  Value *BasePtr = Ops[1];
9772
9773
  // Does the load have an offset?
9774
1.10k
  if (Ops.size() > 2)
9775
552
    BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9776
9777
1.10k
  Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
9778
1.10k
  auto *Load =
9779
1.10k
      cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
9780
1.10k
  auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9781
1.10k
  CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
9782
9783
1.10k
  return IsZExtReturn ? 
Builder.CreateZExt(Load, VectorTy)312
9784
1.10k
                     : 
Builder.CreateSExt(Load, VectorTy)792
;
9785
1.10k
}
9786
9787
Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
9788
                                           SmallVectorImpl<Value *> &Ops,
9789
292
                                           unsigned BuiltinID) {
9790
292
  QualType LangPTy = E->getArg(1)->getType();
9791
292
  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9792
292
      LangPTy->castAs<PointerType>()->getPointeeType());
9793
9794
  // The vector type that is stored may be different from the
9795
  // eventual type stored to memory.
9796
292
  auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
9797
292
  auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9798
9799
292
  Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9800
292
  Value *BasePtr = Ops[1];
9801
9802
  // Does the store have an offset?
9803
292
  if (Ops.size() == 4)
9804
146
    BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9805
9806
  // Last value is always the data
9807
292
  llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
9808
9809
292
  Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
9810
292
  auto *Store =
9811
292
      cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
9812
292
  auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9813
292
  CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
9814
292
  return Store;
9815
292
}
9816
9817
Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
9818
                                      SmallVectorImpl<Value *> &Ops,
9819
244
                                      unsigned IntID) {
9820
244
  Ops[2] = EmitSVEPredicateCast(
9821
244
      Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
9822
9823
244
  SmallVector<Value *> NewOps;
9824
244
  NewOps.push_back(Ops[2]);
9825
9826
244
  llvm::Value *BasePtr = Ops[3];
9827
9828
  // If the intrinsic contains the vnum parameter, multiply it with the vector
9829
  // size in bytes.
9830
244
  if (Ops.size() == 5) {
9831
122
    Function *StreamingVectorLength =
9832
122
        CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
9833
122
    llvm::Value *StreamingVectorLengthCall =
9834
122
        Builder.CreateCall(StreamingVectorLength);
9835
122
    llvm::Value *Mulvl =
9836
122
        Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
9837
    // The type of the ptr parameter is void *, so use Int8Ty here.
9838
122
    BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
9839
122
  }
9840
244
  NewOps.push_back(BasePtr);
9841
244
  NewOps.push_back(Ops[0]);
9842
244
  NewOps.push_back(Ops[1]);
9843
244
  Function *F = CGM.getIntrinsic(IntID);
9844
244
  return Builder.CreateCall(F, NewOps);
9845
244
}
9846
9847
Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
9848
                                         SmallVectorImpl<Value *> &Ops,
9849
960
                                         unsigned IntID) {
9850
960
  auto *VecTy = getSVEType(TypeFlags);
9851
960
  Function *F = CGM.getIntrinsic(IntID, VecTy);
9852
960
  if (TypeFlags.isReadZA())
9853
480
    Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
9854
480
  else if (TypeFlags.isWriteZA())
9855
480
    Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
9856
960
  return Builder.CreateCall(F, Ops);
9857
960
}
9858
9859
Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
9860
                                    SmallVectorImpl<Value *> &Ops,
9861
12
                                    unsigned IntID) {
9862
  // svzero_za() intrinsic zeros the entire za tile and has no paramters.
9863
12
  if (Ops.size() == 0)
9864
3
    Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
9865
12
  Function *F = CGM.getIntrinsic(IntID, {});
9866
12
  return Builder.CreateCall(F, Ops);
9867
12
}
9868
9869
Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
9870
                                      SmallVectorImpl<Value *> &Ops,
9871
24
                                      unsigned IntID) {
9872
24
  if (Ops.size() == 3) {
9873
18
    Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
9874
18
    llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
9875
9876
18
    llvm::Value *VecNum = Ops[2];
9877
18
    llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
9878
9879
18
    Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
9880
18
    Ops[0] = Builder.CreateAdd(
9881
18
        Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
9882
18
    Ops.erase(&Ops[2]);
9883
18
  }
9884
24
  Function *F = CGM.getIntrinsic(IntID, {});
9885
24
  return Builder.CreateCall(F, Ops);
9886
24
}
9887
9888
// Limit the usage of scalable llvm IR generated by the ACLE by using the
9889
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
9890
7.38k
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
9891
7.38k
  return Builder.CreateVectorSplat(
9892
7.38k
      cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
9893
7.38k
}
9894
9895
7.36k
Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
9896
7.36k
  return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
9897
7.36k
}
9898
9899
2.50k
Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
9900
  // FIXME: For big endian this needs an additional REV, or needs a separate
9901
  // intrinsic that is code-generated as a no-op, because the LLVM bitcast
9902
  // instruction is defined as 'bitwise' equivalent from memory point of
9903
  // view (when storing/reloading), whereas the svreinterpret builtin
9904
  // implements bitwise equivalent cast from register point of view.
9905
  // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
9906
2.50k
  return Builder.CreateBitCast(Val, Ty);
9907
2.50k
}
9908
9909
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
9910
907
                                      SmallVectorImpl<Value *> &Ops) {
9911
907
  auto *SplatZero = Constant::getNullValue(Ty);
9912
907
  Ops.insert(Ops.begin(), SplatZero);
9913
907
}
9914
9915
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
9916
917
                                       SmallVectorImpl<Value *> &Ops) {
9917
917
  auto *SplatUndef = UndefValue::get(Ty);
9918
917
  Ops.insert(Ops.begin(), SplatUndef);
9919
917
}
9920
9921
SmallVector<llvm::Type *, 2>
9922
CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
9923
                                     llvm::Type *ResultType,
9924
25.9k
                                     ArrayRef<Value *> Ops) {
9925
25.9k
  if (TypeFlags.isOverloadNone())
9926
1.40k
    return {};
9927
9928
24.5k
  llvm::Type *DefaultType = getSVEType(TypeFlags);
9929
9930
24.5k
  if (TypeFlags.isOverloadWhile())
9931
352
    return {DefaultType, Ops[1]->getType()};
9932
9933
24.2k
  if (TypeFlags.isOverloadWhileRW())
9934
144
    return {getSVEPredType(TypeFlags), Ops[0]->getType()};
9935
9936
24.0k
  if (TypeFlags.isOverloadCvt())
9937
150
    return {Ops[0]->getType(), Ops.back()->getType()};
9938
9939
23.9k
  assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
9940
23.9k
  return {DefaultType};
9941
23.9k
}
9942
9943
Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
9944
                                             llvm::Type *Ty,
9945
375
                                             ArrayRef<Value *> Ops) {
9946
375
  assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
9947
375
         "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
9948
9949
375
  unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
9950
375
  auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
9951
375
                      TypeFlags.isTupleSet() ? 
Ops[2]->getType()171
:
Ty204
);
9952
375
  Value *Idx = ConstantInt::get(CGM.Int64Ty,
9953
375
                                I * SingleVecTy->getMinNumElements());
9954
9955
375
  if (TypeFlags.isTupleSet())
9956
171
    return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
9957
204
  return Builder.CreateExtractVector(Ty, Ops[0], Idx);
9958
375
}
9959
9960
Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
9961
                                             llvm::Type *Ty,
9962
147
                                             ArrayRef<Value *> Ops) {
9963
147
  assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
9964
9965
147
  auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
9966
147
  unsigned MinElts = SrcTy->getMinNumElements();
9967
147
  Value *Call = llvm::PoisonValue::get(Ty);
9968
588
  for (unsigned I = 0; I < Ops.size(); 
I++441
) {
9969
441
    Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9970
441
    Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
9971
441
  }
9972
9973
147
  return Call;
9974
147
}
9975
9976
26.4k
Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
9977
  // Multi-vector results should be broken up into a single (wide) result
9978
  // vector.
9979
26.4k
  auto *StructTy = dyn_cast<StructType>(Call->getType());
9980
26.4k
  if (!StructTy)
9981
25.9k
    return Call;
9982
9983
522
  auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
9984
522
  if (!VTy)
9985
0
    return Call;
9986
522
  unsigned N = StructTy->getNumElements();
9987
9988
  // We may need to emit a cast to a svbool_t
9989
522
  bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
9990
522
  unsigned MinElts = IsPredTy ? 
162
:
VTy->getMinNumElements()520
;
9991
9992
522
  ScalableVectorType *WideVTy =
9993
522
      ScalableVectorType::get(VTy->getElementType(), MinElts * N);
9994
522
  Value *Ret = llvm::PoisonValue::get(WideVTy);
9995
2.08k
  for (unsigned I = 0; I < N; 
++I1.56k
) {
9996
1.56k
    Value *SRet = Builder.CreateExtractValue(Call, I);
9997
1.56k
    assert(SRet->getType() == VTy && "Unexpected type for result value");
9998
1.56k
    Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9999
10000
1.56k
    if (IsPredTy)
10001
4
      SRet = EmitSVEPredicateCast(
10002
4
          SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10003
10004
1.56k
    Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10005
1.56k
  }
10006
522
  Call = Ret;
10007
10008
522
  return Call;
10009
522
}
10010
10011
void CodeGenFunction::GetAArch64SVEProcessedOperands(
10012
    unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10013
41.0k
    SVETypeFlags TypeFlags) {
10014
  // Find out if any arguments are required to be integer constant expressions.
10015
41.0k
  unsigned ICEArguments = 0;
10016
41.0k
  ASTContext::GetBuiltinTypeError Error;
10017
41.0k
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10018
41.0k
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
10019
10020
  // Tuple set/get only requires one insert/extract vector, which is
10021
  // created by EmitSVETupleSetOrGet.
10022
41.0k
  bool IsTupleGetOrSet = TypeFlags.isTupleSet() || 
TypeFlags.isTupleGet()40.8k
;
10023
10024
141k
  for (unsigned i = 0, e = E->getNumArgs(); i != e; 
i++100k
) {
10025
100k
    bool IsICE = ICEArguments & (1 << i);
10026
100k
    Value *Arg = EmitScalarExpr(E->getArg(i));
10027
10028
100k
    if (IsICE) {
10029
      // If this is required to be a constant, constant fold it so that we know
10030
      // that the generated intrinsic gets a ConstantInt.
10031
6.21k
      std::optional<llvm::APSInt> Result =
10032
6.21k
          E->getArg(i)->getIntegerConstantExpr(getContext());
10033
6.21k
      assert(Result && "Expected argument to be a constant");
10034
10035
      // Immediates for SVE llvm intrinsics are always 32bit.  We can safely
10036
      // truncate because the immediate has been range checked and no valid
10037
      // immediate requires more than a handful of bits.
10038
6.21k
      *Result = Result->extOrTrunc(32);
10039
6.21k
      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10040
6.21k
      continue;
10041
6.21k
    }
10042
10043
94.3k
    if (IsTupleGetOrSet || 
!isa<ScalableVectorType>(Arg->getType())93.8k
) {
10044
21.0k
      Ops.push_back(Arg);
10045
21.0k
      continue;
10046
21.0k
    }
10047
10048
73.3k
    auto *VTy = cast<ScalableVectorType>(Arg->getType());
10049
73.3k
    unsigned MinElts = VTy->getMinNumElements();
10050
73.3k
    bool IsPred = VTy->getElementType()->isIntegerTy(1);
10051
73.3k
    unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 
1623.8k
:
12849.5k
);
10052
10053
73.3k
    if (N == 1) {
10054
71.4k
      Ops.push_back(Arg);
10055
71.4k
      continue;
10056
71.4k
    }
10057
10058
7.44k
    
for (unsigned I = 0; 1.87k
I < N;
++I5.56k
) {
10059
5.56k
      Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10060
5.56k
      auto *NewVTy =
10061
5.56k
          ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10062
5.56k
      Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10063
5.56k
    }
10064
1.87k
  }
10065
41.0k
}
10066
10067
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
10068
41.7k
                                                  const CallExpr *E) {
10069
41.7k
  llvm::Type *Ty = ConvertType(E->getType());
10070
41.7k
  if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10071
41.7k
      
BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x42.64k
) {
10072
2.45k
    Value *Val = EmitScalarExpr(E->getArg(0));
10073
2.45k
    return EmitSVEReinterpret(Val, Ty);
10074
2.45k
  }
10075
10076
39.2k
  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10077
39.2k
                                              AArch64SVEIntrinsicsProvenSorted);
10078
10079
39.2k
  llvm::SmallVector<Value *, 4> Ops;
10080
39.2k
  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10081
39.2k
  GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10082
10083
39.2k
  if (TypeFlags.isLoad())
10084
1.10k
    return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10085
1.10k
                             TypeFlags.isZExtReturn());
10086
38.1k
  else if (TypeFlags.isStore())
10087
292
    return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10088
37.8k
  else if (TypeFlags.isGatherLoad())
10089
2.37k
    return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10090
35.5k
  else if (TypeFlags.isScatterStore())
10091
838
    return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10092
34.6k
  else if (TypeFlags.isPrefetch())
10093
260
    return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10094
34.4k
  else if (TypeFlags.isGatherPrefetch())
10095
160
    return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10096
34.2k
  else if (TypeFlags.isStructLoad())
10097
640
    return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10098
33.6k
  else if (TypeFlags.isStructStore())
10099
706
    return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10100
32.9k
  else if (TypeFlags.isTupleSet() || 
TypeFlags.isTupleGet()32.7k
)
10101
375
    return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10102
32.5k
  else if (TypeFlags.isTupleCreate())
10103
147
    return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10104
32.3k
  else if (TypeFlags.isUndef())
10105
5.81k
    return UndefValue::get(Ty);
10106
26.5k
  else if (Builtin->LLVMIntrinsic != 0) {
10107
25.9k
    if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10108
907
      InsertExplicitZeroOperand(Builder, Ty, Ops);
10109
10110
25.9k
    if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10111
917
      InsertExplicitUndefOperand(Builder, Ty, Ops);
10112
10113
    // Some ACLE builtins leave out the argument to specify the predicate
10114
    // pattern, which is expected to be expanded to an SV_ALL pattern.
10115
25.9k
    if (TypeFlags.isAppendSVALL())
10116
29
      Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10117
25.9k
    if (TypeFlags.isInsertOp1SVALL())
10118
260
      Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10119
10120
    // Predicates must match the main datatype.
10121
101k
    for (unsigned i = 0, e = Ops.size(); i != e; 
++i75.6k
)
10122
75.6k
      if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10123
61.4k
        if (PredTy->getElementType()->isIntegerTy(1))
10124
16.5k
          Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10125
10126
    // Splat scalar operand to vector (intrinsics with _n infix)
10127
25.9k
    if (TypeFlags.hasSplatOperand()) {
10128
7.32k
      unsigned OpNo = TypeFlags.getSplatOperand();
10129
7.32k
      Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10130
7.32k
    }
10131
10132
25.9k
    if (TypeFlags.isReverseCompare())
10133
270
      std::swap(Ops[1], Ops[2]);
10134
25.7k
    else if (TypeFlags.isReverseUSDOT())
10135
8
      std::swap(Ops[1], Ops[2]);
10136
25.7k
    else if (TypeFlags.isReverseMergeAnyBinOp() &&
10137
25.7k
             
TypeFlags.getMergeType() == SVETypeFlags::MergeAny828
)
10138
276
      std::swap(Ops[1], Ops[2]);
10139
25.4k
    else if (TypeFlags.isReverseMergeAnyAccOp() &&
10140
25.4k
             
TypeFlags.getMergeType() == SVETypeFlags::MergeAny840
)
10141
280
      std::swap(Ops[1], Ops[3]);
10142
10143
    // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10144
25.9k
    if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10145
3.48k
      llvm::Type *OpndTy = Ops[1]->getType();
10146
3.48k
      auto *SplatZero = Constant::getNullValue(OpndTy);
10147
3.48k
      Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10148
3.48k
    }
10149
10150
25.9k
    Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10151
25.9k
                                   getSVEOverloadTypes(TypeFlags, Ty, Ops));
10152
25.9k
    Value *Call = Builder.CreateCall(F, Ops);
10153
10154
    // Predicate results must be converted to svbool_t.
10155
25.9k
    if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10156
23.9k
      if (PredTy->getScalarType()->isIntegerTy(1))
10157
1.97k
        Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10158
10159
25.9k
    return FormSVEBuiltinResult(Call);
10160
25.9k
  }
10161
10162
592
  switch (BuiltinID) {
10163
0
  default:
10164
0
    return nullptr;
10165
3
  case SVE::BI__builtin_sve_svpsel_lane_b8:
10166
6
  case SVE::BI__builtin_sve_svpsel_lane_b16:
10167
9
  case SVE::BI__builtin_sve_svpsel_lane_b32:
10168
12
  case SVE::BI__builtin_sve_svpsel_lane_b64:
10169
15
  case SVE::BI__builtin_sve_svpsel_lane_c8:
10170
18
  case SVE::BI__builtin_sve_svpsel_lane_c16:
10171
21
  case SVE::BI__builtin_sve_svpsel_lane_c32:
10172
24
  case SVE::BI__builtin_sve_svpsel_lane_c64: {
10173
24
    bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10174
24
    assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10175
24
                               "aarch64.svcount")) &&
10176
24
           "Unexpected TargetExtType");
10177
24
    auto SVCountTy =
10178
24
        llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10179
24
    Function *CastFromSVCountF =
10180
24
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10181
24
    Function *CastToSVCountF =
10182
24
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10183
10184
24
    auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10185
24
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10186
24
    llvm::Value *Ops0 =
10187
24
        IsSVCount ? 
Builder.CreateCall(CastFromSVCountF, Ops[0])12
:
Ops[0]12
;
10188
24
    llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10189
24
    llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10190
24
    return IsSVCount ? 
Builder.CreateCall(CastToSVCountF, PSel)12
:
PSel12
;
10191
24
  }
10192
5
  case SVE::BI__builtin_sve_svmov_b_z: {
10193
    // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10194
5
    SVETypeFlags TypeFlags(Builtin->TypeModifier);
10195
5
    llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10196
5
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10197
5
    return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10198
24
  }
10199
10200
5
  case SVE::BI__builtin_sve_svnot_b_z: {
10201
    // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10202
5
    SVETypeFlags TypeFlags(Builtin->TypeModifier);
10203
5
    llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10204
5
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10205
5
    return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10206
24
  }
10207
10208
6
  case SVE::BI__builtin_sve_svmovlb_u16:
10209
12
  case SVE::BI__builtin_sve_svmovlb_u32:
10210
18
  case SVE::BI__builtin_sve_svmovlb_u64:
10211
18
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10212
10213
6
  case SVE::BI__builtin_sve_svmovlb_s16:
10214
12
  case SVE::BI__builtin_sve_svmovlb_s32:
10215
18
  case SVE::BI__builtin_sve_svmovlb_s64:
10216
18
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10217
10218
6
  case SVE::BI__builtin_sve_svmovlt_u16:
10219
12
  case SVE::BI__builtin_sve_svmovlt_u32:
10220
18
  case SVE::BI__builtin_sve_svmovlt_u64:
10221
18
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10222
10223
6
  case SVE::BI__builtin_sve_svmovlt_s16:
10224
12
  case SVE::BI__builtin_sve_svmovlt_s32:
10225
18
  case SVE::BI__builtin_sve_svmovlt_s64:
10226
18
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10227
10228
6
  case SVE::BI__builtin_sve_svpmullt_u16:
10229
12
  case SVE::BI__builtin_sve_svpmullt_u64:
10230
18
  case SVE::BI__builtin_sve_svpmullt_n_u16:
10231
24
  case SVE::BI__builtin_sve_svpmullt_n_u64:
10232
24
    return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10233
10234
6
  case SVE::BI__builtin_sve_svpmullb_u16:
10235
12
  case SVE::BI__builtin_sve_svpmullb_u64:
10236
18
  case SVE::BI__builtin_sve_svpmullb_n_u16:
10237
24
  case SVE::BI__builtin_sve_svpmullb_n_u64:
10238
24
    return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10239
10240
5
  case SVE::BI__builtin_sve_svdup_n_b8:
10241
10
  case SVE::BI__builtin_sve_svdup_n_b16:
10242
15
  case SVE::BI__builtin_sve_svdup_n_b32:
10243
20
  case SVE::BI__builtin_sve_svdup_n_b64: {
10244
20
    Value *CmpNE =
10245
20
        Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10246
20
    llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10247
20
    Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10248
20
    return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10249
15
  }
10250
10251
6
  case SVE::BI__builtin_sve_svdupq_n_b8:
10252
12
  case SVE::BI__builtin_sve_svdupq_n_b16:
10253
18
  case SVE::BI__builtin_sve_svdupq_n_b32:
10254
24
  case SVE::BI__builtin_sve_svdupq_n_b64:
10255
29
  case SVE::BI__builtin_sve_svdupq_n_u8:
10256
34
  case SVE::BI__builtin_sve_svdupq_n_s8:
10257
39
  case SVE::BI__builtin_sve_svdupq_n_u64:
10258
44
  case SVE::BI__builtin_sve_svdupq_n_f64:
10259
49
  case SVE::BI__builtin_sve_svdupq_n_s64:
10260
54
  case SVE::BI__builtin_sve_svdupq_n_u16:
10261
59
  case SVE::BI__builtin_sve_svdupq_n_f16:
10262
64
  case SVE::BI__builtin_sve_svdupq_n_bf16:
10263
69
  case SVE::BI__builtin_sve_svdupq_n_s16:
10264
74
  case SVE::BI__builtin_sve_svdupq_n_u32:
10265
79
  case SVE::BI__builtin_sve_svdupq_n_f32:
10266
84
  case SVE::BI__builtin_sve_svdupq_n_s32: {
10267
    // These builtins are implemented by storing each element to an array and using
10268
    // ld1rq to materialize a vector.
10269
84
    unsigned NumOpnds = Ops.size();
10270
10271
84
    bool IsBoolTy =
10272
84
        cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10273
10274
    // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10275
    // so that the compare can use the width that is natural for the expected
10276
    // number of predicate lanes.
10277
84
    llvm::Type *EltTy = Ops[0]->getType();
10278
84
    if (IsBoolTy)
10279
24
      EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10280
10281
84
    SmallVector<llvm::Value *, 16> VecOps;
10282
674
    for (unsigned I = 0; I < NumOpnds; 
++I590
)
10283
590
        VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10284
84
    Value *Vec = BuildVector(VecOps);
10285
10286
84
    llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10287
84
    Value *InsertSubVec = Builder.CreateInsertVector(
10288
84
        OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10289
10290
84
    Function *F =
10291
84
        CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10292
84
    Value *DupQLane =
10293
84
        Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10294
10295
84
    if (!IsBoolTy)
10296
60
      return DupQLane;
10297
10298
24
    SVETypeFlags TypeFlags(Builtin->TypeModifier);
10299
24
    Value *Pred = EmitSVEAllTruePred(TypeFlags);
10300
10301
    // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10302
24
    F = CGM.getIntrinsic(NumOpnds == 2 ? 
Intrinsic::aarch64_sve_cmpne6
10303
24
                                       : 
Intrinsic::aarch64_sve_cmpne_wide18
,
10304
24
                         OverloadedTy);
10305
24
    Value *Call = Builder.CreateCall(
10306
24
        F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10307
24
    return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10308
84
  }
10309
10310
5
  case SVE::BI__builtin_sve_svpfalse_b:
10311
5
    return ConstantInt::getFalse(Ty);
10312
10313
5
  case SVE::BI__builtin_sve_svpfalse_c: {
10314
5
    auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10315
5
    Function *CastToSVCountF =
10316
5
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10317
5
    return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10318
84
  }
10319
10320
5
  case SVE::BI__builtin_sve_svlen_bf16:
10321
10
  case SVE::BI__builtin_sve_svlen_f16:
10322
15
  case SVE::BI__builtin_sve_svlen_f32:
10323
20
  case SVE::BI__builtin_sve_svlen_f64:
10324
25
  case SVE::BI__builtin_sve_svlen_s8:
10325
30
  case SVE::BI__builtin_sve_svlen_s16:
10326
35
  case SVE::BI__builtin_sve_svlen_s32:
10327
40
  case SVE::BI__builtin_sve_svlen_s64:
10328
45
  case SVE::BI__builtin_sve_svlen_u8:
10329
50
  case SVE::BI__builtin_sve_svlen_u16:
10330
55
  case SVE::BI__builtin_sve_svlen_u32:
10331
60
  case SVE::BI__builtin_sve_svlen_u64: {
10332
60
    SVETypeFlags TF(Builtin->TypeModifier);
10333
60
    auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10334
60
    auto *NumEls =
10335
60
        llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10336
10337
60
    Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10338
60
    return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10339
55
  }
10340
10341
6
  case SVE::BI__builtin_sve_svtbl2_u8:
10342
12
  case SVE::BI__builtin_sve_svtbl2_s8:
10343
18
  case SVE::BI__builtin_sve_svtbl2_u16:
10344
24
  case SVE::BI__builtin_sve_svtbl2_s16:
10345
30
  case SVE::BI__builtin_sve_svtbl2_u32:
10346
36
  case SVE::BI__builtin_sve_svtbl2_s32:
10347
42
  case SVE::BI__builtin_sve_svtbl2_u64:
10348
48
  case SVE::BI__builtin_sve_svtbl2_s64:
10349
54
  case SVE::BI__builtin_sve_svtbl2_f16:
10350
60
  case SVE::BI__builtin_sve_svtbl2_bf16:
10351
66
  case SVE::BI__builtin_sve_svtbl2_f32:
10352
72
  case SVE::BI__builtin_sve_svtbl2_f64: {
10353
72
    SVETypeFlags TF(Builtin->TypeModifier);
10354
72
    auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10355
72
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10356
72
    return Builder.CreateCall(F, Ops);
10357
66
  }
10358
10359
7
  case SVE::BI__builtin_sve_svset_neonq_s8:
10360
12
  case SVE::BI__builtin_sve_svset_neonq_s16:
10361
17
  case SVE::BI__builtin_sve_svset_neonq_s32:
10362
22
  case SVE::BI__builtin_sve_svset_neonq_s64:
10363
27
  case SVE::BI__builtin_sve_svset_neonq_u8:
10364
32
  case SVE::BI__builtin_sve_svset_neonq_u16:
10365
37
  case SVE::BI__builtin_sve_svset_neonq_u32:
10366
42
  case SVE::BI__builtin_sve_svset_neonq_u64:
10367
47
  case SVE::BI__builtin_sve_svset_neonq_f16:
10368
52
  case SVE::BI__builtin_sve_svset_neonq_f32:
10369
57
  case SVE::BI__builtin_sve_svset_neonq_f64:
10370
64
  case SVE::BI__builtin_sve_svset_neonq_bf16: {
10371
64
    return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10372
57
  }
10373
10374
7
  case SVE::BI__builtin_sve_svget_neonq_s8:
10375
12
  case SVE::BI__builtin_sve_svget_neonq_s16:
10376
17
  case SVE::BI__builtin_sve_svget_neonq_s32:
10377
22
  case SVE::BI__builtin_sve_svget_neonq_s64:
10378
27
  case SVE::BI__builtin_sve_svget_neonq_u8:
10379
32
  case SVE::BI__builtin_sve_svget_neonq_u16:
10380
37
  case SVE::BI__builtin_sve_svget_neonq_u32:
10381
42
  case SVE::BI__builtin_sve_svget_neonq_u64:
10382
47
  case SVE::BI__builtin_sve_svget_neonq_f16:
10383
52
  case SVE::BI__builtin_sve_svget_neonq_f32:
10384
57
  case SVE::BI__builtin_sve_svget_neonq_f64:
10385
64
  case SVE::BI__builtin_sve_svget_neonq_bf16: {
10386
64
    return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10387
57
  }
10388
10389
7
  case SVE::BI__builtin_sve_svdup_neonq_s8:
10390
12
  case SVE::BI__builtin_sve_svdup_neonq_s16:
10391
17
  case SVE::BI__builtin_sve_svdup_neonq_s32:
10392
22
  case SVE::BI__builtin_sve_svdup_neonq_s64:
10393
27
  case SVE::BI__builtin_sve_svdup_neonq_u8:
10394
32
  case SVE::BI__builtin_sve_svdup_neonq_u16:
10395
37
  case SVE::BI__builtin_sve_svdup_neonq_u32:
10396
42
  case SVE::BI__builtin_sve_svdup_neonq_u64:
10397
47
  case SVE::BI__builtin_sve_svdup_neonq_f16:
10398
52
  case SVE::BI__builtin_sve_svdup_neonq_f32:
10399
57
  case SVE::BI__builtin_sve_svdup_neonq_f64:
10400
64
  case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10401
64
    Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10402
64
                                               Builder.getInt64(0));
10403
64
    return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10404
64
                                   {Insert, Builder.getInt64(0)});
10405
57
  }
10406
592
  }
10407
10408
  /// Should not happen
10409
0
  return nullptr;
10410
592
}
10411
10412
Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
10413
1.73k
                                                  const CallExpr *E) {
10414
1.73k
  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10415
1.73k
                                              AArch64SMEIntrinsicsProvenSorted);
10416
10417
1.73k
  llvm::SmallVector<Value *, 4> Ops;
10418
1.73k
  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10419
1.73k
  GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10420
10421
1.73k
  if (TypeFlags.isLoad() || 
TypeFlags.isStore()1.61k
)
10422
244
    return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10423
1.48k
  else if (TypeFlags.isReadZA() || 
TypeFlags.isWriteZA()1.00k
)
10424
960
    return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10425
528
  else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10426
528
           
BuiltinID == SME::BI__builtin_sme_svzero_za519
)
10427
12
    return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10428
516
  else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10429
516
           
BuiltinID == SME::BI__builtin_sme_svstr_vnum_za507
||
10430
516
           
BuiltinID == SME::BI__builtin_sme_svldr_za498
||
10431
516
           
BuiltinID == SME::BI__builtin_sme_svstr_za495
)
10432
24
    return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10433
10434
  // Should not happen!
10435
492
  if (Builtin->LLVMIntrinsic == 0)
10436
0
    return nullptr;
10437
10438
  // Predicates must match the main datatype.
10439
2.85k
  
for (unsigned i = 0, e = Ops.size(); 492
i != e;
++i2.36k
)
10440
2.36k
    if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10441
1.88k
      if (PredTy->getElementType()->isIntegerTy(1))
10442
400
        Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10443
10444
492
  Function *F =
10445
492
      TypeFlags.isOverloadNone()
10446
492
          ? 
CGM.getIntrinsic(Builtin->LLVMIntrinsic)12
10447
492
          : 
CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)})480
;
10448
492
  Value *Call = Builder.CreateCall(F, Ops);
10449
10450
492
  return FormSVEBuiltinResult(Call);
10451
492
}
10452
10453
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
10454
                                               const CallExpr *E,
10455
47.9k
                                               llvm::Triple::ArchType Arch) {
10456
47.9k
  if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10457
47.9k
      
BuiltinID <= clang::AArch64::LastSVEBuiltin44.1k
)
10458
41.7k
    return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10459
10460
6.20k
  if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10461
6.20k
      
BuiltinID <= clang::AArch64::LastSMEBuiltin2.44k
)
10462
1.73k
    return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10463
10464
4.47k
  unsigned HintID = static_cast<unsigned>(-1);
10465
4.47k
  switch (BuiltinID) {
10466
4.41k
  default: break;
10467
4.41k
  case clang::AArch64::BI__builtin_arm_nop:
10468
7
    HintID = 0;
10469
7
    break;
10470
7
  case clang::AArch64::BI__builtin_arm_yield:
10471
9
  case clang::AArch64::BI__yield:
10472
9
    HintID = 1;
10473
9
    break;
10474
7
  case clang::AArch64::BI__builtin_arm_wfe:
10475
9
  case clang::AArch64::BI__wfe:
10476
9
    HintID = 2;
10477
9
    break;
10478
7
  case clang::AArch64::BI__builtin_arm_wfi:
10479
9
  case clang::AArch64::BI__wfi:
10480
9
    HintID = 3;
10481
9
    break;
10482
7
  case clang::AArch64::BI__builtin_arm_sev:
10483
9
  case clang::AArch64::BI__sev:
10484
9
    HintID = 4;
10485
9
    break;
10486
7
  case clang::AArch64::BI__builtin_arm_sevl:
10487
9
  case clang::AArch64::BI__sevl:
10488
9
    HintID = 5;
10489
9
    break;
10490
4.47k
  }
10491
10492
4.47k
  if (HintID != static_cast<unsigned>(-1)) {
10493
52
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10494
52
    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10495
52
  }
10496
10497
4.41k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10498
7
    assert((getContext().getTypeSize(E->getType()) == 32) &&
10499
7
           "rbit of unusual size!");
10500
7
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10501
7
    return Builder.CreateCall(
10502
7
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10503
7
  }
10504
4.41k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10505
10
    assert((getContext().getTypeSize(E->getType()) == 64) &&
10506
10
           "rbit of unusual size!");
10507
10
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10508
10
    return Builder.CreateCall(
10509
10
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10510
10
  }
10511
10512
4.40k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10513
4.40k
      
BuiltinID == clang::AArch64::BI__builtin_arm_clz644.39k
) {
10514
12
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10515
12
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10516
12
    Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10517
12
    if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10518
8
      Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10519
12
    return Res;
10520
12
  }
10521
10522
4.38k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10523
7
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10524
7
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10525
7
                              "cls");
10526
7
  }
10527
4.38k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10528
14
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10529
14
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10530
14
                              "cls");
10531
14
  }
10532
10533
4.36k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
10534
4.36k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rint32z4.36k
) {
10535
2
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10536
2
    llvm::Type *Ty = Arg->getType();
10537
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
10538
2
                              Arg, "frint32z");
10539
2
  }
10540
10541
4.36k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
10542
4.36k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rint64z4.36k
) {
10543
2
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10544
2
    llvm::Type *Ty = Arg->getType();
10545
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
10546
2
                              Arg, "frint64z");
10547
2
  }
10548
10549
4.36k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
10550
4.36k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rint32x4.36k
) {
10551
2
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10552
2
    llvm::Type *Ty = Arg->getType();
10553
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
10554
2
                              Arg, "frint32x");
10555
2
  }
10556
10557
4.36k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
10558
4.36k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rint64x4.36k
) {
10559
2
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10560
2
    llvm::Type *Ty = Arg->getType();
10561
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
10562
2
                              Arg, "frint64x");
10563
2
  }
10564
10565
4.36k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
10566
6
    assert((getContext().getTypeSize(E->getType()) == 32) &&
10567
6
           "__jcvt of unusual size!");
10568
6
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10569
6
    return Builder.CreateCall(
10570
6
        CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
10571
6
  }
10572
10573
4.35k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
10574
4.35k
      
BuiltinID == clang::AArch64::BI__builtin_arm_st64b4.35k
||
10575
4.35k
      
BuiltinID == clang::AArch64::BI__builtin_arm_st64bv4.34k
||
10576
4.35k
      
BuiltinID == clang::AArch64::BI__builtin_arm_st64bv04.34k
) {
10577
16
    llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
10578
16
    llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
10579
10580
16
    if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
10581
      // Load from the address via an LLVM intrinsic, receiving a
10582
      // tuple of 8 i64 words, and store each one to ValPtr.
10583
4
      Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
10584
4
      llvm::Value *Val = Builder.CreateCall(F, MemAddr);
10585
4
      llvm::Value *ToRet;
10586
36
      for (size_t i = 0; i < 8; 
i++32
) {
10587
32
        llvm::Value *ValOffsetPtr =
10588
32
            Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10589
32
        Address Addr =
10590
32
            Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10591
32
        ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
10592
32
      }
10593
4
      return ToRet;
10594
12
    } else {
10595
      // Load 8 i64 words from ValPtr, and store them to the address
10596
      // via an LLVM intrinsic.
10597
12
      SmallVector<llvm::Value *, 9> Args;
10598
12
      Args.push_back(MemAddr);
10599
108
      for (size_t i = 0; i < 8; 
i++96
) {
10600
96
        llvm::Value *ValOffsetPtr =
10601
96
            Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10602
96
        Address Addr =
10603
96
            Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10604
96
        Args.push_back(Builder.CreateLoad(Addr));
10605
96
      }
10606
10607
12
      auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
10608
12
                       ? 
Intrinsic::aarch64_st64b4
10609
12
                   : 
BuiltinID == clang::AArch64::BI__builtin_arm_st64bv8
10610
8
                       ? 
Intrinsic::aarch64_st64bv4
10611
8
                       : 
Intrinsic::aarch64_st64bv04
);
10612
12
      Function *F = CGM.getIntrinsic(Intr);
10613
12
      return Builder.CreateCall(F, Args);
10614
12
    }
10615
16
  }
10616
10617
4.33k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
10618
4.33k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs4.33k
) {
10619
10620
10
    auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
10621
10
                     ? 
Intrinsic::aarch64_rndr5
10622
10
                     : 
Intrinsic::aarch64_rndrrs5
);
10623
10
    Function *F = CGM.getIntrinsic(Intr);
10624
10
    llvm::Value *Val = Builder.CreateCall(F);
10625
10
    Value *RandomValue = Builder.CreateExtractValue(Val, 0);
10626
10
    Value *Status = Builder.CreateExtractValue(Val, 1);
10627
10628
10
    Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
10629
10
    Builder.CreateStore(RandomValue, MemAddress);
10630
10
    Status = Builder.CreateZExt(Status, Int32Ty);
10631
10
    return Status;
10632
10
  }
10633
10634
4.32k
  if (BuiltinID == clang::AArch64::BI__clear_cache) {
10635
3
    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
10636
3
    const FunctionDecl *FD = E->getDirectCallee();
10637
3
    Value *Ops[2];
10638
9
    for (unsigned i = 0; i < 2; 
i++6
)
10639
6
      Ops[i] = EmitScalarExpr(E->getArg(i));
10640
3
    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
10641
3
    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
10642
3
    StringRef Name = FD->getName();
10643
3
    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
10644
3
  }
10645
10646
4.32k
  if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10647
4.32k
       
BuiltinID == clang::AArch64::BI__builtin_arm_ldaex4.31k
) &&
10648
4.32k
      
getContext().getTypeSize(E->getType()) == 12825
) {
10649
2
    Function *F =
10650
2
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10651
2
                             ? 
Intrinsic::aarch64_ldaxp1
10652
2
                             : 
Intrinsic::aarch64_ldxp1
);
10653
10654
2
    Value *LdPtr = EmitScalarExpr(E->getArg(0));
10655
2
    Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
10656
10657
2
    Value *Val0 = Builder.CreateExtractValue(Val, 1);
10658
2
    Value *Val1 = Builder.CreateExtractValue(Val, 0);
10659
2
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
10660
2
    Val0 = Builder.CreateZExt(Val0, Int128Ty);
10661
2
    Val1 = Builder.CreateZExt(Val1, Int128Ty);
10662
10663
2
    Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
10664
2
    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
10665
2
    Val = Builder.CreateOr(Val, Val1);
10666
2
    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
10667
4.32k
  } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10668
4.32k
             
BuiltinID == clang::AArch64::BI__builtin_arm_ldaex4.30k
) {
10669
23
    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
10670
10671
23
    QualType Ty = E->getType();
10672
23
    llvm::Type *RealResTy = ConvertType(Ty);
10673
23
    llvm::Type *IntTy =
10674
23
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
10675
10676
23
    Function *F =
10677
23
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10678
23
                             ? 
Intrinsic::aarch64_ldaxr9
10679
23
                             : 
Intrinsic::aarch64_ldxr14
,
10680
23
                         UnqualPtrTy);
10681
23
    CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
10682
23
    Val->addParamAttr(
10683
23
        0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
10684
10685
23
    if (RealResTy->isPointerTy())
10686
4
      return Builder.CreateIntToPtr(Val, RealResTy);
10687
10688
19
    llvm::Type *IntResTy = llvm::IntegerType::get(
10689
19
        getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
10690
19
    return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
10691
19
                                 RealResTy);
10692
23
  }
10693
10694
4.30k
  if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10695
4.30k
       
BuiltinID == clang::AArch64::BI__builtin_arm_stlex4.28k
) &&
10696
4.30k
      
getContext().getTypeSize(E->getArg(0)->getType()) == 12821
) {
10697
2
    Function *F =
10698
2
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
10699
2
                             ? 
Intrinsic::aarch64_stlxp1
10700
2
                             : 
Intrinsic::aarch64_stxp1
);
10701
2
    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
10702
10703
2
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
10704
2
    EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
10705
10706
2
    Tmp = Tmp.withElementType(STy);
10707
2
    llvm::Value *Val = Builder.CreateLoad(Tmp);
10708
10709
2
    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
10710
2
    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
10711
2
    Value *StPtr = EmitScalarExpr(E->getArg(1));
10712
2
    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
10713
2
  }
10714
10715
4.29k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10716
4.29k
      
BuiltinID == clang::AArch64::BI__builtin_arm_stlex4.28k
) {
10717
19
    Value *StoreVal = EmitScalarExpr(E->getArg(0));
10718
19
    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
10719
10720
19
    QualType Ty = E->getArg(0)->getType();
10721
19
    llvm::Type *StoreTy =
10722
19
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
10723
10724
19
    if (StoreVal->getType()->isPointerTy())
10725
2
      StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
10726
17
    else {
10727
17
      llvm::Type *IntTy = llvm::IntegerType::get(
10728
17
          getLLVMContext(),
10729
17
          CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
10730
17
      StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
10731
17
      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
10732
17
    }
10733
10734
19
    Function *F =
10735
19
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
10736
19
                             ? 
Intrinsic::aarch64_stlxr7
10737
19
                             : 
Intrinsic::aarch64_stxr12
,
10738
19
                         StoreAddr->getType());
10739
19
    CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
10740
19
    CI->addParamAttr(
10741
19
        1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
10742
19
    return CI;
10743
19
  }
10744
10745
4.27k
  if (BuiltinID == clang::AArch64::BI__getReg) {
10746
4
    Expr::EvalResult Result;
10747
4
    if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
10748
0
      llvm_unreachable("Sema will ensure that the parameter is constant");
10749
10750
4
    llvm::APSInt Value = Result.Val.getInt();
10751
4
    LLVMContext &Context = CGM.getLLVMContext();
10752
4
    std::string Reg = Value == 31 ? 
"sp"2
:
"x" + toString(Value, 10)2
;
10753
10754
4
    llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
10755
4
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10756
4
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10757
10758
4
    llvm::Function *F =
10759
4
        CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
10760
4
    return Builder.CreateCall(F, Metadata);
10761
4
  }
10762
10763
4.27k
  if (BuiltinID == clang::AArch64::BI__break) {
10764
2
    Expr::EvalResult Result;
10765
2
    if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
10766
0
      llvm_unreachable("Sema will ensure that the parameter is constant");
10767
10768
2
    llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
10769
2
    return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
10770
2
  }
10771
10772
4.27k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
10773
1
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
10774
1
    return Builder.CreateCall(F);
10775
1
  }
10776
10777
4.27k
  if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
10778
2
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
10779
2
                               llvm::SyncScope::SingleThread);
10780
10781
  // CRC32
10782
4.27k
  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
10783
4.27k
  switch (BuiltinID) {
10784
7
  case clang::AArch64::BI__builtin_arm_crc32b:
10785
7
    CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
10786
6
  case clang::AArch64::BI__builtin_arm_crc32cb:
10787
6
    CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
10788
6
  case clang::AArch64::BI__builtin_arm_crc32h:
10789
6
    CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
10790
6
  case clang::AArch64::BI__builtin_arm_crc32ch:
10791
6
    CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
10792
6
  case clang::AArch64::BI__builtin_arm_crc32w:
10793
6
    CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
10794
6
  case clang::AArch64::BI__builtin_arm_crc32cw:
10795
6
    CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
10796
6
  case clang::AArch64::BI__builtin_arm_crc32d:
10797
6
    CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
10798
7
  case clang::AArch64::BI__builtin_arm_crc32cd:
10799
7
    CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
10800
4.27k
  }
10801
10802
4.27k
  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
10803
50
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
10804
50
    Value *Arg1 = EmitScalarExpr(E->getArg(1));
10805
50
    Function *F = CGM.getIntrinsic(CRCIntrinsicID);
10806
10807
50
    llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
10808
50
    Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
10809
10810
50
    return Builder.CreateCall(F, {Arg0, Arg1});
10811
50
  }
10812
10813
  // Memory Operations (MOPS)
10814
4.22k
  if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
10815
13
    Value *Dst = EmitScalarExpr(E->getArg(0));
10816
13
    Value *Val = EmitScalarExpr(E->getArg(1));
10817
13
    Value *Size = EmitScalarExpr(E->getArg(2));
10818
13
    Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
10819
13
    Val = Builder.CreateTrunc(Val, Int8Ty);
10820
13
    Size = Builder.CreateIntCast(Size, Int64Ty, false);
10821
13
    return Builder.CreateCall(
10822
13
        CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
10823
13
  }
10824
10825
  // Memory Tagging Extensions (MTE) Intrinsics
10826
4.20k
  Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
10827
4.20k
  switch (BuiltinID) {
10828
7
  case clang::AArch64::BI__builtin_arm_irg:
10829
7
    MTEIntrinsicID = Intrinsic::aarch64_irg; break;
10830
4
  case clang::AArch64::BI__builtin_arm_addg:
10831
4
    MTEIntrinsicID = Intrinsic::aarch64_addg; break;
10832
2
  case clang::AArch64::BI__builtin_arm_gmi:
10833
2
    MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
10834
4
  case clang::AArch64::BI__builtin_arm_ldg:
10835
4
    MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
10836
2
  case clang::AArch64::BI__builtin_arm_stg:
10837
2
    MTEIntrinsicID = Intrinsic::aarch64_stg; break;
10838
8
  case clang::AArch64::BI__builtin_arm_subp:
10839
8
    MTEIntrinsicID = Intrinsic::aarch64_subp; break;
10840
4.20k
  }
10841
10842
4.20k
  if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
10843
27
    llvm::Type *T = ConvertType(E->getType());
10844
10845
27
    if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
10846
7
      Value *Pointer = EmitScalarExpr(E->getArg(0));
10847
7
      Value *Mask = EmitScalarExpr(E->getArg(1));
10848
10849
7
      Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
10850
7
      Mask = Builder.CreateZExt(Mask, Int64Ty);
10851
7
      Value *RV = Builder.CreateCall(
10852
7
                       CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
10853
7
       return Builder.CreatePointerCast(RV, T);
10854
7
    }
10855
20
    if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
10856
4
      Value *Pointer = EmitScalarExpr(E->getArg(0));
10857
4
      Value *TagOffset = EmitScalarExpr(E->getArg(1));
10858
10859
4
      Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
10860
4
      TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
10861
4
      Value *RV = Builder.CreateCall(
10862
4
                       CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
10863
4
      return Builder.CreatePointerCast(RV, T);
10864
4
    }
10865
16
    if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
10866
2
      Value *Pointer = EmitScalarExpr(E->getArg(0));
10867
2
      Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
10868
10869
2
      ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
10870
2
      Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
10871
2
      return Builder.CreateCall(
10872
2
                       CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
10873
2
    }
10874
    // Although it is possible to supply a different return
10875
    // address (first arg) to this intrinsic, for now we set
10876
    // return address same as input address.
10877
14
    if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
10878
4
      Value *TagAddress = EmitScalarExpr(E->getArg(0));
10879
4
      TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
10880
4
      Value *RV = Builder.CreateCall(
10881
4
                    CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
10882
4
      return Builder.CreatePointerCast(RV, T);
10883
4
    }
10884
    // Although it is possible to supply a different tag (to set)
10885
    // to this intrinsic (as first arg), for now we supply
10886
    // the tag that is in input address arg (common use case).
10887
10
    if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
10888
2
        Value *TagAddress = EmitScalarExpr(E->getArg(0));
10889
2
        TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
10890
2
        return Builder.CreateCall(
10891
2
                 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
10892
2
    }
10893
8
    if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
10894
8
      Value *PointerA = EmitScalarExpr(E->getArg(0));
10895
8
      Value *PointerB = EmitScalarExpr(E->getArg(1));
10896
8
      PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
10897
8
      PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
10898
8
      return Builder.CreateCall(
10899
8
                       CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
10900
8
    }
10901
8
  }
10902
10903
4.18k
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
10904
4.18k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rsr644.16k
||
10905
4.18k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rsr1284.15k
||
10906
4.18k
      
BuiltinID == clang::AArch64::BI__builtin_arm_rsrp4.14k
||
10907
4.18k
      
BuiltinID == clang::AArch64::BI__builtin_arm_wsr4.14k
||
10908
4.18k
      
BuiltinID == clang::AArch64::BI__builtin_arm_wsr644.12k
||
10909
4.18k
      
BuiltinID == clang::AArch64::BI__builtin_arm_wsr1284.11k
||
10910
4.18k
      
BuiltinID == clang::AArch64::BI__builtin_arm_wsrp4.10k
) {
10911
10912
80
    SpecialRegisterAccessKind AccessKind = Write;
10913
80
    if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
10914
80
        
BuiltinID == clang::AArch64::BI__builtin_arm_rsr6467
||
10915
80
        
BuiltinID == clang::AArch64::BI__builtin_arm_rsr12854
||
10916
80
        
BuiltinID == clang::AArch64::BI__builtin_arm_rsrp49
)
10917
40
      AccessKind = VolatileRead;
10918
10919
80
    bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
10920
80
                            
BuiltinID == clang::AArch64::BI__builtin_arm_wsrp71
;
10921
10922
80
    bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
10923
80
                   
BuiltinID == clang::AArch64::BI__builtin_arm_wsr67
;
10924
10925
80
    bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
10926
80
                    
BuiltinID == clang::AArch64::BI__builtin_arm_wsr12875
;
10927
10928
80
    llvm::Type *ValueType;
10929
80
    llvm::Type *RegisterType = Int64Ty;
10930
80
    if (Is32Bit) {
10931
26
      ValueType = Int32Ty;
10932
54
    } else if (Is128Bit) {
10933
10
      llvm::Type *Int128Ty =
10934
10
          llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
10935
10
      ValueType = Int128Ty;
10936
10
      RegisterType = Int128Ty;
10937
44
    } else if (IsPointerBuiltin) {
10938
18
      ValueType = VoidPtrTy;
10939
26
    } else {
10940
26
      ValueType = Int64Ty;
10941
26
    };
10942
10943
80
    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
10944
80
                                      AccessKind);
10945
80
  }
10946
10947
4.10k
  if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
10948
4.10k
      
BuiltinID == clang::AArch64::BI_WriteStatusReg4.06k
) {
10949
80
    LLVMContext &Context = CGM.getLLVMContext();
10950
10951
80
    unsigned SysReg =
10952
80
      E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
10953
10954
80
    std::string SysRegStr;
10955
80
    llvm::raw_string_ostream(SysRegStr) <<
10956
80
                       ((1 << 1) | ((SysReg >> 14) & 1))  << ":" <<
10957
80
                       ((SysReg >> 11) & 7)               << ":" <<
10958
80
                       ((SysReg >> 7)  & 15)              << ":" <<
10959
80
                       ((SysReg >> 3)  & 15)              << ":" <<
10960
80
                       ( SysReg        & 7);
10961
10962
80
    llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
10963
80
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10964
80
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10965
10966
80
    llvm::Type *RegisterType = Int64Ty;
10967
80
    llvm::Type *Types[] = { RegisterType };
10968
10969
80
    if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
10970
40
      llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
10971
10972
40
      return Builder.CreateCall(F, Metadata);
10973
40
    }
10974
10975
40
    llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
10976
40
    llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
10977
10978
40
    return Builder.CreateCall(F, { Metadata, ArgValue });
10979
80
  }
10980
10981
4.02k
  if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
10982
1
    llvm::Function *F =
10983
1
        CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
10984
1
    return Builder.CreateCall(F);
10985
1
  }
10986
10987
4.01k
  if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
10988
1
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
10989
1
    return Builder.CreateCall(F);
10990
1
  }
10991
10992
4.01k
  if (BuiltinID == clang::AArch64::BI__mulh ||
10993
4.01k
      
BuiltinID == clang::AArch64::BI__umulh4.01k
) {
10994
4
    llvm::Type *ResType = ConvertType(E->getType());
10995
4
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
10996
10997
4
    bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
10998
4
    Value *LHS =
10999
4
        Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11000
4
    Value *RHS =
11001
4
        Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11002
11003
4
    Value *MulResult, *HigherBits;
11004
4
    if (IsSigned) {
11005
2
      MulResult = Builder.CreateNSWMul(LHS, RHS);
11006
2
      HigherBits = Builder.CreateAShr(MulResult, 64);
11007
2
    } else {
11008
2
      MulResult = Builder.CreateNUWMul(LHS, RHS);
11009
2
      HigherBits = Builder.CreateLShr(MulResult, 64);
11010
2
    }
11011
4
    HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11012
11013
4
    return HigherBits;
11014
4
  }
11015
11016
4.01k
  if (BuiltinID == AArch64::BI__writex18byte ||
11017
4.01k
      
BuiltinID == AArch64::BI__writex18word4.01k
||
11018
4.01k
      
BuiltinID == AArch64::BI__writex18dword4.01k
||
11019
4.01k
      
BuiltinID == AArch64::BI__writex18qword4.00k
) {
11020
    // Read x18 as i8*
11021
8
    LLVMContext &Context = CGM.getLLVMContext();
11022
8
    llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11023
8
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11024
8
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11025
8
    llvm::Function *F =
11026
8
        CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11027
8
    llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11028
8
    X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11029
11030
    // Store val at x18 + offset
11031
8
    Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11032
8
    Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11033
8
    Value *Val = EmitScalarExpr(E->getArg(1));
11034
8
    StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11035
8
    return Store;
11036
8
  }
11037
11038
4.00k
  if (BuiltinID == AArch64::BI__readx18byte ||
11039
4.00k
      
BuiltinID == AArch64::BI__readx18word4.00k
||
11040
4.00k
      
BuiltinID == AArch64::BI__readx18dword4.00k
||
11041
4.00k
      
BuiltinID == AArch64::BI__readx18qword4.00k
) {
11042
8
    llvm::Type *IntTy = ConvertType(E->getType());
11043
11044
    // Read x18 as i8*
11045
8
    LLVMContext &Context = CGM.getLLVMContext();
11046
8
    llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11047
8
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11048
8
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11049
8
    llvm::Function *F =
11050
8
        CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11051
8
    llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11052
8
    X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11053
11054
    // Load x18 + offset
11055
8
    Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11056
8
    Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11057
8
    LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11058
8
    return Load;
11059
8
  }
11060
11061
3.99k
  if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11062
3.99k
      
BuiltinID == AArch64::BI_CopyFloatFromInt323.99k
||
11063
3.99k
      
BuiltinID == AArch64::BI_CopyInt32FromFloat3.99k
||
11064
3.99k
      
BuiltinID == AArch64::BI_CopyInt64FromDouble3.99k
) {
11065
8
    Value *Arg = EmitScalarExpr(E->getArg(0));
11066
8
    llvm::Type *RetTy = ConvertType(E->getType());
11067
8
    return Builder.CreateBitCast(Arg, RetTy);
11068
8
  }
11069
11070
3.99k
  if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11071
3.99k
      
BuiltinID == AArch64::BI_CountLeadingOnes643.98k
||
11072
3.99k
      
BuiltinID == AArch64::BI_CountLeadingZeros3.98k
||
11073
3.99k
      
BuiltinID == AArch64::BI_CountLeadingZeros643.98k
) {
11074
8
    Value *Arg = EmitScalarExpr(E->getArg(0));
11075
8
    llvm::Type *ArgType = Arg->getType();
11076
11077
8
    if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11078
8
        
BuiltinID == AArch64::BI_CountLeadingOnes646
)
11079
4
      Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11080
11081
8
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11082
8
    Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11083
11084
8
    if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11085
8
        
BuiltinID == AArch64::BI_CountLeadingZeros646
)
11086
4
      Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11087
8
    return Result;
11088
8
  }
11089
11090
3.98k
  if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11091
3.98k
      
BuiltinID == AArch64::BI_CountLeadingSigns643.98k
) {
11092
4
    Value *Arg = EmitScalarExpr(E->getArg(0));
11093
11094
4
    Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11095
4
                      ? 
CGM.getIntrinsic(Intrinsic::aarch64_cls)2
11096
4
                      : 
CGM.getIntrinsic(Intrinsic::aarch64_cls64)2
;
11097
11098
4
    Value *Result = Builder.CreateCall(F, Arg, "cls");
11099
4
    if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11100
2
      Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11101
4
    return Result;
11102
4
  }
11103
11104
3.97k
  if (BuiltinID == AArch64::BI_CountOneBits ||
11105
3.97k
      
BuiltinID == AArch64::BI_CountOneBits643.97k
) {
11106
4
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
11107
4
    llvm::Type *ArgType = ArgValue->getType();
11108
4
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11109
11110
4
    Value *Result = Builder.CreateCall(F, ArgValue);
11111
4
    if (BuiltinID == AArch64::BI_CountOneBits64)
11112
2
      Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11113
4
    return Result;
11114
4
  }
11115
11116
3.97k
  if (BuiltinID == AArch64::BI__prefetch) {
11117
2
    Value *Address = EmitScalarExpr(E->getArg(0));
11118
2
    Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11119
2
    Value *Locality = ConstantInt::get(Int32Ty, 3);
11120
2
    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11121
2
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11122
2
    return Builder.CreateCall(F, {Address, RW, Locality, Data});
11123
2
  }
11124
11125
  // Handle MSVC intrinsics before argument evaluation to prevent double
11126
  // evaluation.
11127
3.97k
  if (std::optional<MSVCIntrin> MsvcIntId =
11128
3.97k
          translateAarch64ToMsvcIntrin(BuiltinID))
11129
210
    return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11130
11131
  // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11132
547k
  
auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) 3.76k
{
11133
547k
    return P.first == BuiltinID;
11134
547k
  });
11135
3.76k
  if (It != end(NEONEquivalentIntrinsicMap))
11136
206
    BuiltinID = It->second;
11137
11138
  // Find out if any arguments are required to be integer constant
11139
  // expressions.
11140
3.76k
  unsigned ICEArguments = 0;
11141
3.76k
  ASTContext::GetBuiltinTypeError Error;
11142
3.76k
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11143
3.76k
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
11144
11145
3.76k
  llvm::SmallVector<Value*, 4> Ops;
11146
3.76k
  Address PtrOp0 = Address::invalid();
11147
11.3k
  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; 
i++7.63k
) {
11148
7.63k
    if (i == 0) {
11149
3.47k
      switch (BuiltinID) {
11150
32
      case NEON::BI__builtin_neon_vld1_v:
11151
47
      case NEON::BI__builtin_neon_vld1q_v:
11152
62
      case NEON::BI__builtin_neon_vld1_dup_v:
11153
77
      case NEON::BI__builtin_neon_vld1q_dup_v:
11154
92
      case NEON::BI__builtin_neon_vld1_lane_v:
11155
107
      case NEON::BI__builtin_neon_vld1q_lane_v:
11156
122
      case NEON::BI__builtin_neon_vst1_v:
11157
137
      case NEON::BI__builtin_neon_vst1q_v:
11158
152
      case NEON::BI__builtin_neon_vst1_lane_v:
11159
167
      case NEON::BI__builtin_neon_vst1q_lane_v:
11160
171
      case NEON::BI__builtin_neon_vldap1_lane_s64:
11161
175
      case NEON::BI__builtin_neon_vldap1q_lane_s64:
11162
179
      case NEON::BI__builtin_neon_vstl1_lane_s64:
11163
183
      case NEON::BI__builtin_neon_vstl1q_lane_s64:
11164
        // Get the alignment for the argument in addition to the value;
11165
        // we'll use it later.
11166
183
        PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11167
183
        Ops.push_back(PtrOp0.getPointer());
11168
183
        continue;
11169
3.47k
      }
11170
3.47k
    }
11171
7.45k
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11172
7.45k
  }
11173
11174
3.76k
  auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11175
3.76k
  const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11176
3.76k
      SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11177
11178
3.76k
  if (Builtin) {
11179
271
    Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11180
271
    Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11181
271
    assert(Result && "SISD intrinsic should have been handled");
11182
271
    return Result;
11183
271
  }
11184
11185
3.49k
  const Expr *Arg = E->getArg(E->getNumArgs()-1);
11186
3.49k
  NeonTypeFlags Type(0);
11187
3.49k
  if (std::optional<llvm::APSInt> Result =
11188
3.49k
          Arg->getIntegerConstantExpr(getContext()))
11189
    // Determine the type of this overloaded NEON intrinsic.
11190
3.21k
    Type = NeonTypeFlags(Result->getZExtValue());
11191
11192
3.49k
  bool usgn = Type.isUnsigned();
11193
3.49k
  bool quad = Type.isQuad();
11194
11195
  // Handle non-overloaded intrinsics first.
11196
3.49k
  switch (BuiltinID) {
11197
2.85k
  default: break;
11198
2.85k
  case NEON::BI__builtin_neon_vabsh_f16:
11199
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11200
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11201
1
  case NEON::BI__builtin_neon_vaddq_p128: {
11202
1
    llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11203
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11204
1
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11205
1
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11206
1
    Ops[0] =  Builder.CreateXor(Ops[0], Ops[1]);
11207
1
    llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11208
1
    return Builder.CreateBitCast(Ops[0], Int128Ty);
11209
0
  }
11210
2
  case NEON::BI__builtin_neon_vldrq_p128: {
11211
2
    llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11212
2
    Value *Ptr = EmitScalarExpr(E->getArg(0));
11213
2
    return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11214
2
                                     CharUnits::fromQuantity(16));
11215
0
  }
11216
2
  case NEON::BI__builtin_neon_vstrq_p128: {
11217
2
    Value *Ptr = Ops[0];
11218
2
    return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
11219
0
  }
11220
3
  case NEON::BI__builtin_neon_vcvts_f32_u32:
11221
6
  case NEON::BI__builtin_neon_vcvtd_f64_u64:
11222
6
    usgn = true;
11223
6
    [[fallthrough]];
11224
9
  case NEON::BI__builtin_neon_vcvts_f32_s32:
11225
12
  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11226
12
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11227
12
    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11228
12
    llvm::Type *InTy = Is64 ? 
Int64Ty6
:
Int32Ty6
;
11229
12
    llvm::Type *FTy = Is64 ? 
DoubleTy6
:
FloatTy6
;
11230
12
    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11231
12
    if (usgn)
11232
6
      return Builder.CreateUIToFP(Ops[0], FTy);
11233
6
    return Builder.CreateSIToFP(Ops[0], FTy);
11234
12
  }
11235
3
  case NEON::BI__builtin_neon_vcvth_f16_u16:
11236
6
  case NEON::BI__builtin_neon_vcvth_f16_u32:
11237
9
  case NEON::BI__builtin_neon_vcvth_f16_u64:
11238
9
    usgn = true;
11239
9
    [[fallthrough]];
11240
12
  case NEON::BI__builtin_neon_vcvth_f16_s16:
11241
15
  case NEON::BI__builtin_neon_vcvth_f16_s32:
11242
18
  case NEON::BI__builtin_neon_vcvth_f16_s64: {
11243
18
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11244
18
    llvm::Type *FTy = HalfTy;
11245
18
    llvm::Type *InTy;
11246
18
    if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11247
6
      InTy = Int64Ty;
11248
12
    else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11249
6
      InTy = Int32Ty;
11250
6
    else
11251
6
      InTy = Int16Ty;
11252
18
    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11253
18
    if (usgn)
11254
9
      return Builder.CreateUIToFP(Ops[0], FTy);
11255
9
    return Builder.CreateSIToFP(Ops[0], FTy);
11256
18
  }
11257
1
  case NEON::BI__builtin_neon_vcvtah_u16_f16:
11258
2
  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11259
3
  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11260
4
  case NEON::BI__builtin_neon_vcvtph_u16_f16:
11261
7
  case NEON::BI__builtin_neon_vcvth_u16_f16:
11262
8
  case NEON::BI__builtin_neon_vcvtah_s16_f16:
11263
9
  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11264
10
  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11265
11
  case NEON::BI__builtin_neon_vcvtph_s16_f16:
11266
14
  case NEON::BI__builtin_neon_vcvth_s16_f16: {
11267
14
    unsigned Int;
11268
14
    llvm::Type* InTy = Int32Ty;
11269
14
    llvm::Type* FTy  = HalfTy;
11270
14
    llvm::Type *Tys[2] = {InTy, FTy};
11271
14
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11272
14
    switch (BuiltinID) {
11273
0
    default: llvm_unreachable("missing builtin ID in switch!");
11274
1
    case NEON::BI__builtin_neon_vcvtah_u16_f16:
11275
1
      Int = Intrinsic::aarch64_neon_fcvtau; break;
11276
1
    case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11277
1
      Int = Intrinsic::aarch64_neon_fcvtmu; break;
11278
1
    case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11279
1
      Int = Intrinsic::aarch64_neon_fcvtnu; break;
11280
1
    case NEON::BI__builtin_neon_vcvtph_u16_f16:
11281
1
      Int = Intrinsic::aarch64_neon_fcvtpu; break;
11282
3
    case NEON::BI__builtin_neon_vcvth_u16_f16:
11283
3
      Int = Intrinsic::aarch64_neon_fcvtzu; break;
11284
1
    case NEON::BI__builtin_neon_vcvtah_s16_f16:
11285
1
      Int = Intrinsic::aarch64_neon_fcvtas; break;
11286
1
    case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11287
1
      Int = Intrinsic::aarch64_neon_fcvtms; break;
11288
1
    case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11289
1
      Int = Intrinsic::aarch64_neon_fcvtns; break;
11290
1
    case NEON::BI__builtin_neon_vcvtph_s16_f16:
11291
1
      Int = Intrinsic::aarch64_neon_fcvtps; break;
11292
3
    case NEON::BI__builtin_neon_vcvth_s16_f16:
11293
3
      Int = Intrinsic::aarch64_neon_fcvtzs; break;
11294
14
    }
11295
14
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11296
14
    return Builder.CreateTrunc(Ops[0], Int16Ty);
11297
14
  }
11298
1
  case NEON::BI__builtin_neon_vcaleh_f16:
11299
2
  case NEON::BI__builtin_neon_vcalth_f16:
11300
3
  case NEON::BI__builtin_neon_vcageh_f16:
11301
4
  case NEON::BI__builtin_neon_vcagth_f16: {
11302
4
    unsigned Int;
11303
4
    llvm::Type* InTy = Int32Ty;
11304
4
    llvm::Type* FTy  = HalfTy;
11305
4
    llvm::Type *Tys[2] = {InTy, FTy};
11306
4
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11307
4
    switch (BuiltinID) {
11308
0
    default: llvm_unreachable("missing builtin ID in switch!");
11309
1
    case NEON::BI__builtin_neon_vcageh_f16:
11310
1
      Int = Intrinsic::aarch64_neon_facge; break;
11311
1
    case NEON::BI__builtin_neon_vcagth_f16:
11312
1
      Int = Intrinsic::aarch64_neon_facgt; break;
11313
1
    case NEON::BI__builtin_neon_vcaleh_f16:
11314
1
      Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11315
1
    case NEON::BI__builtin_neon_vcalth_f16:
11316
1
      Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11317
4
    }
11318
4
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11319
4
    return Builder.CreateTrunc(Ops[0], Int16Ty);
11320
4
  }
11321
1
  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11322
2
  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11323
2
    unsigned Int;
11324
2
    llvm::Type* InTy = Int32Ty;
11325
2
    llvm::Type* FTy  = HalfTy;
11326
2
    llvm::Type *Tys[2] = {InTy, FTy};
11327
2
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11328
2
    switch (BuiltinID) {
11329
0
    default: llvm_unreachable("missing builtin ID in switch!");
11330
1
    case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11331
1
      Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11332
1
    case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11333
1
      Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11334
2
    }
11335
2
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11336
2
    return Builder.CreateTrunc(Ops[0], Int16Ty);
11337
2
  }
11338
1
  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11339
2
  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11340
2
    unsigned Int;
11341
2
    llvm::Type* FTy  = HalfTy;
11342
2
    llvm::Type* InTy = Int32Ty;
11343
2
    llvm::Type *Tys[2] = {FTy, InTy};
11344
2
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11345
2
    switch (BuiltinID) {
11346
0
    default: llvm_unreachable("missing builtin ID in switch!");
11347
1
    case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11348
1
      Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11349
1
      Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11350
1
      break;
11351
1
    case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11352
1
      Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11353
1
      Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11354
1
      break;
11355
2
    }
11356
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11357
2
  }
11358
0
  case NEON::BI__builtin_neon_vpaddd_s64: {
11359
0
    auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11360
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
11361
    // The vector is v2f64, so make sure it's bitcast to that.
11362
0
    Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11363
0
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11364
0
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11365
0
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11366
0
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11367
    // Pairwise addition of a v2f64 into a scalar f64.
11368
0
    return Builder.CreateAdd(Op0, Op1, "vpaddd");
11369
2
  }
11370
3
  case NEON::BI__builtin_neon_vpaddd_f64: {
11371
3
    auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11372
3
    Value *Vec = EmitScalarExpr(E->getArg(0));
11373
    // The vector is v2f64, so make sure it's bitcast to that.
11374
3
    Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11375
3
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11376
3
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11377
3
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11378
3
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11379
    // Pairwise addition of a v2f64 into a scalar f64.
11380
3
    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11381
2
  }
11382
3
  case NEON::BI__builtin_neon_vpadds_f32: {
11383
3
    auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11384
3
    Value *Vec = EmitScalarExpr(E->getArg(0));
11385
    // The vector is v2f32, so make sure it's bitcast to that.
11386
3
    Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11387
3
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11388
3
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11389
3
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11390
3
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11391
    // Pairwise addition of a v2f32 into a scalar f32.
11392
3
    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11393
2
  }
11394
1
  case NEON::BI__builtin_neon_vceqzd_s64:
11395
4
  case NEON::BI__builtin_neon_vceqzd_f64:
11396
7
  case NEON::BI__builtin_neon_vceqzs_f32:
11397
10
  case NEON::BI__builtin_neon_vceqzh_f16:
11398
10
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11399
10
    return EmitAArch64CompareBuiltinExpr(
11400
10
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11401
10
        ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11402
1
  case NEON::BI__builtin_neon_vcgezd_s64:
11403
4
  case NEON::BI__builtin_neon_vcgezd_f64:
11404
7
  case NEON::BI__builtin_neon_vcgezs_f32:
11405
10
  case NEON::BI__builtin_neon_vcgezh_f16:
11406
10
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11407
10
    return EmitAArch64CompareBuiltinExpr(
11408
10
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11409
10
        ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11410
1
  case NEON::BI__builtin_neon_vclezd_s64:
11411
4
  case NEON::BI__builtin_neon_vclezd_f64:
11412
7
  case NEON::BI__builtin_neon_vclezs_f32:
11413
10
  case NEON::BI__builtin_neon_vclezh_f16:
11414
10
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11415
10
    return EmitAArch64CompareBuiltinExpr(
11416
10
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11417
10
        ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11418
1
  case NEON::BI__builtin_neon_vcgtzd_s64:
11419
4
  case NEON::BI__builtin_neon_vcgtzd_f64:
11420
7
  case NEON::BI__builtin_neon_vcgtzs_f32:
11421
10
  case NEON::BI__builtin_neon_vcgtzh_f16:
11422
10
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11423
10
    return EmitAArch64CompareBuiltinExpr(
11424
10
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11425
10
        ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11426
1
  case NEON::BI__builtin_neon_vcltzd_s64:
11427
4
  case NEON::BI__builtin_neon_vcltzd_f64:
11428
7
  case NEON::BI__builtin_neon_vcltzs_f32:
11429
10
  case NEON::BI__builtin_neon_vcltzh_f16:
11430
10
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11431
10
    return EmitAArch64CompareBuiltinExpr(
11432
10
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11433
10
        ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11434
11435
1
  case NEON::BI__builtin_neon_vceqzd_u64: {
11436
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11437
1
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11438
1
    Ops[0] =
11439
1
        Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11440
1
    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11441
7
  }
11442
3
  case NEON::BI__builtin_neon_vceqd_f64:
11443
6
  case NEON::BI__builtin_neon_vcled_f64:
11444
9
  case NEON::BI__builtin_neon_vcltd_f64:
11445
12
  case NEON::BI__builtin_neon_vcged_f64:
11446
15
  case NEON::BI__builtin_neon_vcgtd_f64: {
11447
15
    llvm::CmpInst::Predicate P;
11448
15
    switch (BuiltinID) {
11449
0
    default: llvm_unreachable("missing builtin ID in switch!");
11450
3
    case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11451
3
    case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11452
3
    case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11453
3
    case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11454
3
    case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11455
15
    }
11456
15
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11457
15
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11458
15
    Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11459
15
    if (P == llvm::FCmpInst::FCMP_OEQ)
11460
3
      Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11461
12
    else
11462
12
      Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11463
15
    return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11464
15
  }
11465
3
  case NEON::BI__builtin_neon_vceqs_f32:
11466
6
  case NEON::BI__builtin_neon_vcles_f32:
11467
9
  case NEON::BI__builtin_neon_vclts_f32:
11468
12
  case NEON::BI__builtin_neon_vcges_f32:
11469
15
  case NEON::BI__builtin_neon_vcgts_f32: {
11470
15
    llvm::CmpInst::Predicate P;
11471
15
    switch (BuiltinID) {
11472
0
    default: llvm_unreachable("missing builtin ID in switch!");
11473
3
    case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11474
3
    case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11475
3
    case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11476
3
    case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11477
3
    case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11478
15
    }
11479
15
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11480
15
    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11481
15
    Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11482
15
    if (P == llvm::FCmpInst::FCMP_OEQ)
11483
3
      Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11484
12
    else
11485
12
      Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11486
15
    return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11487
15
  }
11488
3
  case NEON::BI__builtin_neon_vceqh_f16:
11489
6
  case NEON::BI__builtin_neon_vcleh_f16:
11490
9
  case NEON::BI__builtin_neon_vclth_f16:
11491
12
  case NEON::BI__builtin_neon_vcgeh_f16:
11492
15
  case NEON::BI__builtin_neon_vcgth_f16: {
11493
15
    llvm::CmpInst::Predicate P;
11494
15
    switch (BuiltinID) {
11495
0
    default: llvm_unreachable("missing builtin ID in switch!");
11496
3
    case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11497
3
    case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11498
3
    case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11499
3
    case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11500
3
    case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11501
15
    }
11502
15
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11503
15
    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11504
15
    Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11505
15
    if (P == llvm::FCmpInst::FCMP_OEQ)
11506
3
      Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11507
12
    else
11508
12
      Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11509
15
    return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11510
15
  }
11511
1
  case NEON::BI__builtin_neon_vceqd_s64:
11512
2
  case NEON::BI__builtin_neon_vceqd_u64:
11513
3
  case NEON::BI__builtin_neon_vcgtd_s64:
11514
4
  case NEON::BI__builtin_neon_vcgtd_u64:
11515
5
  case NEON::BI__builtin_neon_vcltd_s64:
11516
6
  case NEON::BI__builtin_neon_vcltd_u64:
11517
7
  case NEON::BI__builtin_neon_vcged_u64:
11518
8
  case NEON::BI__builtin_neon_vcged_s64:
11519
9
  case NEON::BI__builtin_neon_vcled_u64:
11520
10
  case NEON::BI__builtin_neon_vcled_s64: {
11521
10
    llvm::CmpInst::Predicate P;
11522
10
    switch (BuiltinID) {
11523
0
    default: llvm_unreachable("missing builtin ID in switch!");
11524
1
    case NEON::BI__builtin_neon_vceqd_s64:
11525
2
    case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
11526
1
    case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
11527
1
    case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
11528
1
    case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
11529
1
    case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
11530
1
    case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
11531
1
    case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
11532
1
    case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
11533
1
    case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
11534
10
    }
11535
10
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11536
10
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11537
10
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11538
10
    Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
11539
10
    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
11540
10
  }
11541
1
  case NEON::BI__builtin_neon_vtstd_s64:
11542
2
  case NEON::BI__builtin_neon_vtstd_u64: {
11543
2
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11544
2
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11545
2
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11546
2
    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
11547
2
    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
11548
2
                                llvm::Constant::getNullValue(Int64Ty));
11549
2
    return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
11550
1
  }
11551
3
  case NEON::BI__builtin_neon_vset_lane_i8:
11552
7
  case NEON::BI__builtin_neon_vset_lane_i16:
11553
9
  case NEON::BI__builtin_neon_vset_lane_i32:
11554
13
  case NEON::BI__builtin_neon_vset_lane_i64:
11555
22
  case NEON::BI__builtin_neon_vset_lane_bf16:
11556
23
  case NEON::BI__builtin_neon_vset_lane_f32:
11557
28
  case NEON::BI__builtin_neon_vsetq_lane_i8:
11558
34
  case NEON::BI__builtin_neon_vsetq_lane_i16:
11559
38
  case NEON::BI__builtin_neon_vsetq_lane_i32:
11560
45
  case NEON::BI__builtin_neon_vsetq_lane_i64:
11561
54
  case NEON::BI__builtin_neon_vsetq_lane_bf16:
11562
56
  case NEON::BI__builtin_neon_vsetq_lane_f32:
11563
56
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
11564
56
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11565
5
  case NEON::BI__builtin_neon_vset_lane_f64:
11566
    // The vector type needs a cast for the v1f64 variant.
11567
5
    Ops[1] =
11568
5
        Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
11569
5
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
11570
5
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11571
1
  case NEON::BI__builtin_neon_vsetq_lane_f64:
11572
    // The vector type needs a cast for the v2f64 variant.
11573
1
    Ops[1] =
11574
1
        Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
11575
1
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
11576
1
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11577
11578
3
  case NEON::BI__builtin_neon_vget_lane_i8:
11579
10
  case NEON::BI__builtin_neon_vdupb_lane_i8:
11580
10
    Ops[0] =
11581
10
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
11582
10
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11583
10
                                        "vget_lane");
11584
5
  case NEON::BI__builtin_neon_vgetq_lane_i8:
11585
8
  case NEON::BI__builtin_neon_vdupb_laneq_i8:
11586
8
    Ops[0] =
11587
8
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
11588
8
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11589
8
                                        "vgetq_lane");
11590
58
  case NEON::BI__builtin_neon_vget_lane_i16:
11591
65
  case NEON::BI__builtin_neon_vduph_lane_i16:
11592
65
    Ops[0] =
11593
65
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
11594
65
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11595
65
                                        "vget_lane");
11596
60
  case NEON::BI__builtin_neon_vgetq_lane_i16:
11597
63
  case NEON::BI__builtin_neon_vduph_laneq_i16:
11598
63
    Ops[0] =
11599
63
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
11600
63
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11601
63
                                        "vgetq_lane");
11602
31
  case NEON::BI__builtin_neon_vget_lane_i32:
11603
37
  case NEON::BI__builtin_neon_vdups_lane_i32:
11604
37
    Ops[0] =
11605
37
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
11606
37
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11607
37
                                        "vget_lane");
11608
3
  case NEON::BI__builtin_neon_vdups_lane_f32:
11609
3
    Ops[0] =
11610
3
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11611
3
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11612
3
                                        "vdups_lane");
11613
33
  case NEON::BI__builtin_neon_vgetq_lane_i32:
11614
35
  case NEON::BI__builtin_neon_vdups_laneq_i32:
11615
35
    Ops[0] =
11616
35
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
11617
35
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11618
35
                                        "vgetq_lane");
11619
17
  case NEON::BI__builtin_neon_vget_lane_i64:
11620
23
  case NEON::BI__builtin_neon_vdupd_lane_i64:
11621
23
    Ops[0] =
11622
23
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
11623
23
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11624
23
                                        "vget_lane");
11625
3
  case NEON::BI__builtin_neon_vdupd_lane_f64:
11626
3
    Ops[0] =
11627
3
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11628
3
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11629
3
                                        "vdupd_lane");
11630
22
  case NEON::BI__builtin_neon_vgetq_lane_i64:
11631
24
  case NEON::BI__builtin_neon_vdupd_laneq_i64:
11632
24
    Ops[0] =
11633
24
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
11634
24
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11635
24
                                        "vgetq_lane");
11636
3
  case NEON::BI__builtin_neon_vget_lane_f32:
11637
3
    Ops[0] =
11638
3
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11639
3
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11640
3
                                        "vget_lane");
11641
9
  case NEON::BI__builtin_neon_vget_lane_f64:
11642
9
    Ops[0] =
11643
9
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11644
9
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11645
9
                                        "vget_lane");
11646
4
  case NEON::BI__builtin_neon_vgetq_lane_f32:
11647
5
  case NEON::BI__builtin_neon_vdups_laneq_f32:
11648
5
    Ops[0] =
11649
5
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
11650
5
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11651
5
                                        "vgetq_lane");
11652
6
  case NEON::BI__builtin_neon_vgetq_lane_f64:
11653
7
  case NEON::BI__builtin_neon_vdupd_laneq_f64:
11654
7
    Ops[0] =
11655
7
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
11656
7
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11657
7
                                        "vgetq_lane");
11658
3
  case NEON::BI__builtin_neon_vaddh_f16:
11659
3
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11660
3
    return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
11661
3
  case NEON::BI__builtin_neon_vsubh_f16:
11662
3
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11663
3
    return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
11664
3
  case NEON::BI__builtin_neon_vmulh_f16:
11665
3
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11666
3
    return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
11667
3
  case NEON::BI__builtin_neon_vdivh_f16:
11668
3
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11669
3
    return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
11670
3
  case NEON::BI__builtin_neon_vfmah_f16:
11671
    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11672
3
    return emitCallMaybeConstrainedFPBuiltin(
11673
3
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11674
3
        {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
11675
3
  case NEON::BI__builtin_neon_vfmsh_f16: {
11676
3
    Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
11677
11678
    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11679
3
    return emitCallMaybeConstrainedFPBuiltin(
11680
3
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11681
3
        {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
11682
6
  }
11683
1
  case NEON::BI__builtin_neon_vaddd_s64:
11684
2
  case NEON::BI__builtin_neon_vaddd_u64:
11685
2
    return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
11686
1
  case NEON::BI__builtin_neon_vsubd_s64:
11687
2
  case NEON::BI__builtin_neon_vsubd_u64:
11688
2
    return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
11689
1
  case NEON::BI__builtin_neon_vqdmlalh_s16:
11690
2
  case NEON::BI__builtin_neon_vqdmlslh_s16: {
11691
2
    SmallVector<Value *, 2> ProductOps;
11692
2
    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
11693
2
    ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
11694
2
    auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
11695
2
    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
11696
2
                          ProductOps, "vqdmlXl");
11697
2
    Constant *CI = ConstantInt::get(SizeTy, 0);
11698
2
    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
11699
11700
2
    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
11701
2
                                        ? 
Intrinsic::aarch64_neon_sqadd1
11702
2
                                        : 
Intrinsic::aarch64_neon_sqsub1
;
11703
2
    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
11704
1
  }
11705
1
  case NEON::BI__builtin_neon_vqshlud_n_s64: {
11706
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11707
1
    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
11708
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
11709
1
                        Ops, "vqshlu_n");
11710
1
  }
11711
1
  case NEON::BI__builtin_neon_vqshld_n_u64:
11712
2
  case NEON::BI__builtin_neon_vqshld_n_s64: {
11713
2
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
11714
2
                                   ? 
Intrinsic::aarch64_neon_uqshl1
11715
2
                                   : 
Intrinsic::aarch64_neon_sqshl1
;
11716
2
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11717
2
    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
11718
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
11719
1
  }
11720
1
  case NEON::BI__builtin_neon_vrshrd_n_u64:
11721
2
  case NEON::BI__builtin_neon_vrshrd_n_s64: {
11722
2
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
11723
2
                                   ? 
Intrinsic::aarch64_neon_urshl1
11724
2
                                   : 
Intrinsic::aarch64_neon_srshl1
;
11725
2
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11726
2
    int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
11727
2
    Ops[1] = ConstantInt::get(Int64Ty, -SV);
11728
2
    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
11729
1
  }
11730
1
  case NEON::BI__builtin_neon_vrsrad_n_u64:
11731
2
  case NEON::BI__builtin_neon_vrsrad_n_s64: {
11732
2
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
11733
2
                                   ? 
Intrinsic::aarch64_neon_urshl1
11734
2
                                   : 
Intrinsic::aarch64_neon_srshl1
;
11735
2
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11736
2
    Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
11737
2
    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
11738
2
                                {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
11739
2
    return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
11740
1
  }
11741
1
  case NEON::BI__builtin_neon_vshld_n_s64:
11742
2
  case NEON::BI__builtin_neon_vshld_n_u64: {
11743
2
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11744
2
    return Builder.CreateShl(
11745
2
        Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
11746
1
  }
11747
1
  case NEON::BI__builtin_neon_vshrd_n_s64: {
11748
1
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11749
1
    return Builder.CreateAShr(
11750
1
        Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
11751
1
                                                   Amt->getZExtValue())),
11752
1
        "shrd_n");
11753
1
  }
11754
2
  case NEON::BI__builtin_neon_vshrd_n_u64: {
11755
2
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11756
2
    uint64_t ShiftAmt = Amt->getZExtValue();
11757
    // Right-shifting an unsigned value by its size yields 0.
11758
2
    if (ShiftAmt == 64)
11759
2
      return ConstantInt::get(Int64Ty, 0);
11760
0
    return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
11761
0
                              "shrd_n");
11762
2
  }
11763
1
  case NEON::BI__builtin_neon_vsrad_n_s64: {
11764
1
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
11765
1
    Ops[1] = Builder.CreateAShr(
11766
1
        Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
11767
1
                                                   Amt->getZExtValue())),
11768
1
        "shrd_n");
11769
1
    return Builder.CreateAdd(Ops[0], Ops[1]);
11770
2
  }
11771
2
  case NEON::BI__builtin_neon_vsrad_n_u64: {
11772
2
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
11773
2
    uint64_t ShiftAmt = Amt->getZExtValue();
11774
    // Right-shifting an unsigned value by its size yields 0.
11775
    // As Op + 0 = Op, return Ops[0] directly.
11776
2
    if (ShiftAmt == 64)
11777
1
      return Ops[0];
11778
1
    Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
11779
1
                                "shrd_n");
11780
1
    return Builder.CreateAdd(Ops[0], Ops[1]);
11781
2
  }
11782
1
  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
11783
2
  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
11784
3
  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
11785
4
  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
11786
4
    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
11787
4
                                          "lane");
11788
4
    SmallVector<Value *, 2> ProductOps;
11789
4
    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
11790
4
    ProductOps.push_back(vectorWrapScalar16(Ops[2]));
11791
4
    auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
11792
4
    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
11793
4
                          ProductOps, "vqdmlXl");
11794
4
    Constant *CI = ConstantInt::get(SizeTy, 0);
11795
4
    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
11796
4
    Ops.pop_back();
11797
11798
4
    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
11799
4
                       
BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s163
)
11800
4
                          ? 
Intrinsic::aarch64_neon_sqadd2
11801
4
                          : 
Intrinsic::aarch64_neon_sqsub2
;
11802
4
    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
11803
3
  }
11804
1
  case NEON::BI__builtin_neon_vqdmlals_s32:
11805
2
  case NEON::BI__builtin_neon_vqdmlsls_s32: {
11806
2
    SmallVector<Value *, 2> ProductOps;
11807
2
    ProductOps.push_back(Ops[1]);
11808
2
    ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
11809
2
    Ops[1] =
11810
2
        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
11811
2
                     ProductOps, "vqdmlXl");
11812
11813
2
    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
11814
2
                                        ? 
Intrinsic::aarch64_neon_sqadd1
11815
2
                                        : 
Intrinsic::aarch64_neon_sqsub1
;
11816
2
    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
11817
1
  }
11818
1
  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
11819
2
  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
11820
3
  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
11821
4
  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
11822
4
    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
11823
4
                                          "lane");
11824
4
    SmallVector<Value *, 2> ProductOps;
11825
4
    ProductOps.push_back(Ops[1]);
11826
4
    ProductOps.push_back(Ops[2]);
11827
4
    Ops[1] =
11828
4
        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
11829
4
                     ProductOps, "vqdmlXl");
11830
4
    Ops.pop_back();
11831
11832
4
    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
11833
4
                       
BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s323
)
11834
4
                          ? 
Intrinsic::aarch64_neon_sqadd2
11835
4
                          : 
Intrinsic::aarch64_neon_sqsub2
;
11836
4
    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
11837
3
  }
11838
25
  case NEON::BI__builtin_neon_vget_lane_bf16:
11839
26
  case NEON::BI__builtin_neon_vduph_lane_bf16:
11840
27
  case NEON::BI__builtin_neon_vduph_lane_f16: {
11841
27
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11842
27
                                        "vget_lane");
11843
26
  }
11844
25
  case NEON::BI__builtin_neon_vgetq_lane_bf16:
11845
26
  case NEON::BI__builtin_neon_vduph_laneq_bf16:
11846
27
  case NEON::BI__builtin_neon_vduph_laneq_f16: {
11847
27
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11848
27
                                        "vgetq_lane");
11849
26
  }
11850
11851
8
  case clang::AArch64::BI_InterlockedAdd: {
11852
8
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
11853
8
    Value *Arg1 = EmitScalarExpr(E->getArg(1));
11854
8
    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
11855
8
      AtomicRMWInst::Add, Arg0, Arg1,
11856
8
      llvm::AtomicOrdering::SequentiallyConsistent);
11857
8
    return Builder.CreateAdd(RMWI, Arg1);
11858
26
  }
11859
3.49k
  }
11860
11861
2.85k
  llvm::FixedVectorType *VTy = GetNeonType(this, Type);
11862
2.85k
  llvm::Type *Ty = VTy;
11863
2.85k
  if (!Ty)
11864
0
    return nullptr;
11865
11866
  // Not all intrinsics handled by the common case work for AArch64 yet, so only
11867
  // defer to common code if it's been added to our special map.
11868
2.85k
  Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
11869
2.85k
                                        AArch64SIMDIntrinsicsProvenSorted);
11870
11871
2.85k
  if (Builtin)
11872
1.37k
    return EmitCommonNeonBuiltinExpr(
11873
1.37k
        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
11874
1.37k
        Builtin->NameHint, Builtin->TypeModifier, E, Ops,
11875
1.37k
        /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
11876
11877
1.47k
  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
11878
72
    return V;
11879
11880
1.40k
  unsigned Int;
11881
1.40k
  switch (BuiltinID) {
11882
0
  default: return nullptr;
11883
14
  case NEON::BI__builtin_neon_vbsl_v:
11884
27
  case NEON::BI__builtin_neon_vbslq_v: {
11885
27
    llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
11886
27
    Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
11887
27
    Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
11888
27
    Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
11889
11890
27
    Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
11891
27
    Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
11892
27
    Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
11893
27
    return Builder.CreateBitCast(Ops[0], Ty);
11894
14
  }
11895
24
  case NEON::BI__builtin_neon_vfma_lane_v:
11896
40
  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
11897
    // The ARM builtins (and instructions) have the addend as the first
11898
    // operand, but the 'fma' intrinsics have it last. Swap it around here.
11899
40
    Value *Addend = Ops[0];
11900
40
    Value *Multiplicand = Ops[1];
11901
40
    Value *LaneSource = Ops[2];
11902
40
    Ops[0] = Multiplicand;
11903
40
    Ops[1] = LaneSource;
11904
40
    Ops[2] = Addend;
11905
11906
    // Now adjust things to handle the lane access.
11907
40
    auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
11908
40
                         ? llvm::FixedVectorType::get(VTy->getElementType(),
11909
16
                                                      VTy->getNumElements() / 2)
11910
40
                         : 
VTy24
;
11911
40
    llvm::Constant *cst = cast<Constant>(Ops[3]);
11912
40
    Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
11913
40
    Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
11914
40
    Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
11915
11916
40
    Ops.pop_back();
11917
40
    Int = Builder.getIsFPConstrained() ? 
Intrinsic::experimental_constrained_fma12
11918
40
                                       : 
Intrinsic::fma28
;
11919
40
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
11920
24
  }
11921
24
  case NEON::BI__builtin_neon_vfma_laneq_v: {
11922
24
    auto *VTy = cast<llvm::FixedVectorType>(Ty);
11923
    // v1f64 fma should be mapped to Neon scalar f64 fma
11924
24
    if (VTy && VTy->getElementType() == DoubleTy) {
11925
10
      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11926
10
      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11927
10
      llvm::FixedVectorType *VTy =
11928
10
          GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
11929
10
      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
11930
10
      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
11931
10
      Value *Result;
11932
10
      Result = emitCallMaybeConstrainedFPBuiltin(
11933
10
          *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
11934
10
          DoubleTy, {Ops[1], Ops[2], Ops[0]});
11935
10
      return Builder.CreateBitCast(Result, Ty);
11936
10
    }
11937
14
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11938
14
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11939
11940
14
    auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
11941
14
                                           VTy->getNumElements() * 2);
11942
14
    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
11943
14
    Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
11944
14
                                               cast<ConstantInt>(Ops[3]));
11945
14
    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
11946
11947
14
    return emitCallMaybeConstrainedFPBuiltin(
11948
14
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
11949
14
        {Ops[2], Ops[1], Ops[0]});
11950
24
  }
11951
18
  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
11952
18
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11953
18
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11954
11955
18
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11956
18
    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
11957
18
    return emitCallMaybeConstrainedFPBuiltin(
11958
18
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
11959
18
        {Ops[2], Ops[1], Ops[0]});
11960
24
  }
11961
10
  case NEON::BI__builtin_neon_vfmah_lane_f16:
11962
20
  case NEON::BI__builtin_neon_vfmas_lane_f32:
11963
30
  case NEON::BI__builtin_neon_vfmah_laneq_f16:
11964
32
  case NEON::BI__builtin_neon_vfmas_laneq_f32:
11965
38
  case NEON::BI__builtin_neon_vfmad_lane_f64:
11966
44
  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
11967
44
    Ops.push_back(EmitScalarExpr(E->getArg(3)));
11968
44
    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
11969
44
    Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
11970
44
    return emitCallMaybeConstrainedFPBuiltin(
11971
44
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
11972
44
        {Ops[1], Ops[2], Ops[0]});
11973
38
  }
11974
11
  case NEON::BI__builtin_neon_vmull_v:
11975
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11976
11
    Int = usgn ? 
Intrinsic::aarch64_neon_umull5
:
Intrinsic::aarch64_neon_smull6
;
11977
11
    if (Type.isPoly()) 
Int = Intrinsic::aarch64_neon_pmull1
;
11978
11
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
11979
9
  case NEON::BI__builtin_neon_vmax_v:
11980
18
  case NEON::BI__builtin_neon_vmaxq_v:
11981
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11982
18
    Int = usgn ? 
Intrinsic::aarch64_neon_umax6
:
Intrinsic::aarch64_neon_smax12
;
11983
18
    if (Ty->isFPOrFPVectorTy()) 
Int = Intrinsic::aarch64_neon_fmax6
;
11984
18
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
11985
1
  case NEON::BI__builtin_neon_vmaxh_f16: {
11986
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11987
1
    Int = Intrinsic::aarch64_neon_fmax;
11988
1
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
11989
9
  }
11990
9
  case NEON::BI__builtin_neon_vmin_v:
11991
18
  case NEON::BI__builtin_neon_vminq_v:
11992
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11993
18
    Int = usgn ? 
Intrinsic::aarch64_neon_umin6
:
Intrinsic::aarch64_neon_smin12
;
11994
18
    if (Ty->isFPOrFPVectorTy()) 
Int = Intrinsic::aarch64_neon_fmin6
;
11995
18
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
11996
1
  case NEON::BI__builtin_neon_vminh_f16: {
11997
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11998
1
    Int = Intrinsic::aarch64_neon_fmin;
11999
1
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12000
9
  }
12001
9
  case NEON::BI__builtin_neon_vabd_v:
12002
18
  case NEON::BI__builtin_neon_vabdq_v:
12003
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12004
18
    Int = usgn ? 
Intrinsic::aarch64_neon_uabd6
:
Intrinsic::aarch64_neon_sabd12
;
12005
18
    if (Ty->isFPOrFPVectorTy()) 
Int = Intrinsic::aarch64_neon_fabd6
;
12006
18
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12007
6
  case NEON::BI__builtin_neon_vpadal_v:
12008
12
  case NEON::BI__builtin_neon_vpadalq_v: {
12009
12
    unsigned ArgElts = VTy->getNumElements();
12010
12
    llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12011
12
    unsigned BitWidth = EltTy->getBitWidth();
12012
12
    auto *ArgTy = llvm::FixedVectorType::get(
12013
12
        llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12014
12
    llvm::Type* Tys[2] = { VTy, ArgTy };
12015
12
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddlp6
:
Intrinsic::aarch64_neon_saddlp6
;
12016
12
    SmallVector<llvm::Value*, 1> TmpOps;
12017
12
    TmpOps.push_back(Ops[1]);
12018
12
    Function *F = CGM.getIntrinsic(Int, Tys);
12019
12
    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12020
12
    llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12021
12
    return Builder.CreateAdd(tmp, addend);
12022
6
  }
12023
8
  case NEON::BI__builtin_neon_vpmin_v:
12024
17
  case NEON::BI__builtin_neon_vpminq_v:
12025
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12026
17
    Int = usgn ? 
Intrinsic::aarch64_neon_uminp6
:
Intrinsic::aarch64_neon_sminp11
;
12027
17
    if (Ty->isFPOrFPVectorTy()) 
Int = Intrinsic::aarch64_neon_fminp5
;
12028
17
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12029
8
  case NEON::BI__builtin_neon_vpmax_v:
12030
17
  case NEON::BI__builtin_neon_vpmaxq_v:
12031
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12032
17
    Int = usgn ? 
Intrinsic::aarch64_neon_umaxp6
:
Intrinsic::aarch64_neon_smaxp11
;
12033
17
    if (Ty->isFPOrFPVectorTy()) 
Int = Intrinsic::aarch64_neon_fmaxp5
;
12034
17
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12035
3
  case NEON::BI__builtin_neon_vminnm_v:
12036
6
  case NEON::BI__builtin_neon_vminnmq_v:
12037
6
    Int = Intrinsic::aarch64_neon_fminnm;
12038
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12039
1
  case NEON::BI__builtin_neon_vminnmh_f16:
12040
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12041
1
    Int = Intrinsic::aarch64_neon_fminnm;
12042
1
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12043
3
  case NEON::BI__builtin_neon_vmaxnm_v:
12044
6
  case NEON::BI__builtin_neon_vmaxnmq_v:
12045
6
    Int = Intrinsic::aarch64_neon_fmaxnm;
12046
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12047
1
  case NEON::BI__builtin_neon_vmaxnmh_f16:
12048
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12049
1
    Int = Intrinsic::aarch64_neon_fmaxnm;
12050
1
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12051
1
  case NEON::BI__builtin_neon_vrecpss_f32: {
12052
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12053
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12054
1
                        Ops, "vrecps");
12055
3
  }
12056
1
  case NEON::BI__builtin_neon_vrecpsd_f64:
12057
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12058
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12059
1
                        Ops, "vrecps");
12060
1
  case NEON::BI__builtin_neon_vrecpsh_f16:
12061
1
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12062
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12063
1
                        Ops, "vrecps");
12064
6
  case NEON::BI__builtin_neon_vqshrun_n_v:
12065
6
    Int = Intrinsic::aarch64_neon_sqshrun;
12066
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12067
6
  case NEON::BI__builtin_neon_vqrshrun_n_v:
12068
6
    Int = Intrinsic::aarch64_neon_sqrshrun;
12069
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12070
12
  case NEON::BI__builtin_neon_vqshrn_n_v:
12071
12
    Int = usgn ? 
Intrinsic::aarch64_neon_uqshrn6
:
Intrinsic::aarch64_neon_sqshrn6
;
12072
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12073
12
  case NEON::BI__builtin_neon_vrshrn_n_v:
12074
12
    Int = Intrinsic::aarch64_neon_rshrn;
12075
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12076
13
  case NEON::BI__builtin_neon_vqrshrn_n_v:
12077
13
    Int = usgn ? 
Intrinsic::aarch64_neon_uqrshrn6
:
Intrinsic::aarch64_neon_sqrshrn7
;
12078
13
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12079
3
  case NEON::BI__builtin_neon_vrndah_f16: {
12080
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12081
3
    Int = Builder.getIsFPConstrained()
12082
3
              ? 
Intrinsic::experimental_constrained_round1
12083
3
              : 
Intrinsic::round2
;
12084
3
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12085
3
  }
12086
5
  case NEON::BI__builtin_neon_vrnda_v:
12087
17
  case NEON::BI__builtin_neon_vrndaq_v: {
12088
17
    Int = Builder.getIsFPConstrained()
12089
17
              ? 
Intrinsic::experimental_constrained_round5
12090
17
              : 
Intrinsic::round12
;
12091
17
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12092
5
  }
12093
3
  case NEON::BI__builtin_neon_vrndih_f16: {
12094
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12095
3
    Int = Builder.getIsFPConstrained()
12096
3
              ? 
Intrinsic::experimental_constrained_nearbyint1
12097
3
              : 
Intrinsic::nearbyint2
;
12098
3
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12099
5
  }
12100
3
  case NEON::BI__builtin_neon_vrndmh_f16: {
12101
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12102
3
    Int = Builder.getIsFPConstrained()
12103
3
              ? 
Intrinsic::experimental_constrained_floor1
12104
3
              : 
Intrinsic::floor2
;
12105
3
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12106
5
  }
12107
5
  case NEON::BI__builtin_neon_vrndm_v:
12108
13
  case NEON::BI__builtin_neon_vrndmq_v: {
12109
13
    Int = Builder.getIsFPConstrained()
12110
13
              ? 
Intrinsic::experimental_constrained_floor3
12111
13
              : 
Intrinsic::floor10
;
12112
13
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12113
5
  }
12114
1
  case NEON::BI__builtin_neon_vrndnh_f16: {
12115
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12116
1
    Int = Builder.getIsFPConstrained()
12117
1
              ? 
Intrinsic::experimental_constrained_roundeven0
12118
1
              : Intrinsic::roundeven;
12119
1
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12120
5
  }
12121
3
  case NEON::BI__builtin_neon_vrndn_v:
12122
7
  case NEON::BI__builtin_neon_vrndnq_v: {
12123
7
    Int = Builder.getIsFPConstrained()
12124
7
              ? 
Intrinsic::experimental_constrained_roundeven0
12125
7
              : Intrinsic::roundeven;
12126
7
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12127
3
  }
12128
1
  case NEON::BI__builtin_neon_vrndns_f32: {
12129
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12130
1
    Int = Builder.getIsFPConstrained()
12131
1
              ? 
Intrinsic::experimental_constrained_roundeven0
12132
1
              : Intrinsic::roundeven;
12133
1
    return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12134
3
  }
12135
3
  case NEON::BI__builtin_neon_vrndph_f16: {
12136
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12137
3
    Int = Builder.getIsFPConstrained()
12138
3
              ? 
Intrinsic::experimental_constrained_ceil1
12139
3
              : 
Intrinsic::ceil2
;
12140
3
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12141
3
  }
12142
5
  case NEON::BI__builtin_neon_vrndp_v:
12143
17
  case NEON::BI__builtin_neon_vrndpq_v: {
12144
17
    Int = Builder.getIsFPConstrained()
12145
17
              ? 
Intrinsic::experimental_constrained_ceil5
12146
17
              : 
Intrinsic::ceil12
;
12147
17
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12148
5
  }
12149
3
  case NEON::BI__builtin_neon_vrndxh_f16: {
12150
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12151
3
    Int = Builder.getIsFPConstrained()
12152
3
              ? 
Intrinsic::experimental_constrained_rint1
12153
3
              : 
Intrinsic::rint2
;
12154
3
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12155
5
  }
12156
5
  case NEON::BI__builtin_neon_vrndx_v:
12157
13
  case NEON::BI__builtin_neon_vrndxq_v: {
12158
13
    Int = Builder.getIsFPConstrained()
12159
13
              ? 
Intrinsic::experimental_constrained_rint3
12160
13
              : 
Intrinsic::rint10
;
12161
13
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12162
5
  }
12163
3
  case NEON::BI__builtin_neon_vrndh_f16: {
12164
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12165
3
    Int = Builder.getIsFPConstrained()
12166
3
              ? 
Intrinsic::experimental_constrained_trunc1
12167
3
              : 
Intrinsic::trunc2
;
12168
3
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12169
5
  }
12170
0
  case NEON::BI__builtin_neon_vrnd32x_f32:
12171
0
  case NEON::BI__builtin_neon_vrnd32xq_f32:
12172
0
  case NEON::BI__builtin_neon_vrnd32x_f64:
12173
0
  case NEON::BI__builtin_neon_vrnd32xq_f64: {
12174
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12175
0
    Int = Intrinsic::aarch64_neon_frint32x;
12176
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12177
0
  }
12178
0
  case NEON::BI__builtin_neon_vrnd32z_f32:
12179
0
  case NEON::BI__builtin_neon_vrnd32zq_f32:
12180
0
  case NEON::BI__builtin_neon_vrnd32z_f64:
12181
0
  case NEON::BI__builtin_neon_vrnd32zq_f64: {
12182
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12183
0
    Int = Intrinsic::aarch64_neon_frint32z;
12184
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12185
0
  }
12186
0
  case NEON::BI__builtin_neon_vrnd64x_f32:
12187
0
  case NEON::BI__builtin_neon_vrnd64xq_f32:
12188
0
  case NEON::BI__builtin_neon_vrnd64x_f64:
12189
0
  case NEON::BI__builtin_neon_vrnd64xq_f64: {
12190
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12191
0
    Int = Intrinsic::aarch64_neon_frint64x;
12192
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12193
0
  }
12194
0
  case NEON::BI__builtin_neon_vrnd64z_f32:
12195
0
  case NEON::BI__builtin_neon_vrnd64zq_f32:
12196
0
  case NEON::BI__builtin_neon_vrnd64z_f64:
12197
0
  case NEON::BI__builtin_neon_vrnd64zq_f64: {
12198
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12199
0
    Int = Intrinsic::aarch64_neon_frint64z;
12200
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12201
0
  }
12202
5
  case NEON::BI__builtin_neon_vrnd_v:
12203
13
  case NEON::BI__builtin_neon_vrndq_v: {
12204
13
    Int = Builder.getIsFPConstrained()
12205
13
              ? 
Intrinsic::experimental_constrained_trunc3
12206
13
              : 
Intrinsic::trunc10
;
12207
13
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12208
5
  }
12209
6
  case NEON::BI__builtin_neon_vcvt_f64_v:
12210
8
  case NEON::BI__builtin_neon_vcvtq_f64_v:
12211
8
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12212
8
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12213
8
    return usgn ? 
Builder.CreateUIToFP(Ops[0], Ty, "vcvt")4
12214
8
                : 
Builder.CreateSIToFP(Ops[0], Ty, "vcvt")4
;
12215
1
  case NEON::BI__builtin_neon_vcvt_f64_f32: {
12216
1
    assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12217
1
           "unexpected vcvt_f64_f32 builtin");
12218
1
    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12219
1
    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12220
12221
1
    return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12222
1
  }
12223
1
  case NEON::BI__builtin_neon_vcvt_f32_f64: {
12224
1
    assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12225
1
           "unexpected vcvt_f32_f64 builtin");
12226
1
    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12227
1
    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12228
12229
1
    return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12230
1
  }
12231
1
  case NEON::BI__builtin_neon_vcvt_s32_v:
12232
2
  case NEON::BI__builtin_neon_vcvt_u32_v:
12233
5
  case NEON::BI__builtin_neon_vcvt_s64_v:
12234
8
  case NEON::BI__builtin_neon_vcvt_u64_v:
12235
9
  case NEON::BI__builtin_neon_vcvt_s16_f16:
12236
10
  case NEON::BI__builtin_neon_vcvt_u16_f16:
12237
11
  case NEON::BI__builtin_neon_vcvtq_s32_v:
12238
12
  case NEON::BI__builtin_neon_vcvtq_u32_v:
12239
13
  case NEON::BI__builtin_neon_vcvtq_s64_v:
12240
14
  case NEON::BI__builtin_neon_vcvtq_u64_v:
12241
15
  case NEON::BI__builtin_neon_vcvtq_s16_f16:
12242
16
  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12243
16
    Int =
12244
16
        usgn ? 
Intrinsic::aarch64_neon_fcvtzu8
:
Intrinsic::aarch64_neon_fcvtzs8
;
12245
16
    llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12246
16
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12247
15
  }
12248
1
  case NEON::BI__builtin_neon_vcvta_s16_f16:
12249
2
  case NEON::BI__builtin_neon_vcvta_u16_f16:
12250
3
  case NEON::BI__builtin_neon_vcvta_s32_v:
12251
4
  case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12252
5
  case NEON::BI__builtin_neon_vcvtaq_s32_v:
12253
6
  case NEON::BI__builtin_neon_vcvta_u32_v:
12254
6
  case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12255
7
  case NEON::BI__builtin_neon_vcvtaq_u32_v:
12256
8
  case NEON::BI__builtin_neon_vcvta_s64_v:
12257
9
  case NEON::BI__builtin_neon_vcvtaq_s64_v:
12258
10
  case NEON::BI__builtin_neon_vcvta_u64_v:
12259
11
  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12260
11
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtau5
:
Intrinsic::aarch64_neon_fcvtas6
;
12261
11
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12262
11
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12263
10
  }
12264
1
  case NEON::BI__builtin_neon_vcvtm_s16_f16:
12265
2
  case NEON::BI__builtin_neon_vcvtm_s32_v:
12266
3
  case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12267
4
  case NEON::BI__builtin_neon_vcvtmq_s32_v:
12268
5
  case NEON::BI__builtin_neon_vcvtm_u16_f16:
12269
6
  case NEON::BI__builtin_neon_vcvtm_u32_v:
12270
7
  case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12271
8
  case NEON::BI__builtin_neon_vcvtmq_u32_v:
12272
9
  case NEON::BI__builtin_neon_vcvtm_s64_v:
12273
10
  case NEON::BI__builtin_neon_vcvtmq_s64_v:
12274
11
  case NEON::BI__builtin_neon_vcvtm_u64_v:
12275
12
  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12276
12
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtmu6
:
Intrinsic::aarch64_neon_fcvtms6
;
12277
12
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12278
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12279
11
  }
12280
1
  case NEON::BI__builtin_neon_vcvtn_s16_f16:
12281
2
  case NEON::BI__builtin_neon_vcvtn_s32_v:
12282
3
  case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12283
4
  case NEON::BI__builtin_neon_vcvtnq_s32_v:
12284
5
  case NEON::BI__builtin_neon_vcvtn_u16_f16:
12285
6
  case NEON::BI__builtin_neon_vcvtn_u32_v:
12286
7
  case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12287
8
  case NEON::BI__builtin_neon_vcvtnq_u32_v:
12288
9
  case NEON::BI__builtin_neon_vcvtn_s64_v:
12289
10
  case NEON::BI__builtin_neon_vcvtnq_s64_v:
12290
11
  case NEON::BI__builtin_neon_vcvtn_u64_v:
12291
12
  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12292
12
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtnu6
:
Intrinsic::aarch64_neon_fcvtns6
;
12293
12
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12294
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12295
11
  }
12296
1
  case NEON::BI__builtin_neon_vcvtp_s16_f16:
12297
2
  case NEON::BI__builtin_neon_vcvtp_s32_v:
12298
3
  case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12299
4
  case NEON::BI__builtin_neon_vcvtpq_s32_v:
12300
5
  case NEON::BI__builtin_neon_vcvtp_u16_f16:
12301
6
  case NEON::BI__builtin_neon_vcvtp_u32_v:
12302
7
  case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12303
8
  case NEON::BI__builtin_neon_vcvtpq_u32_v:
12304
9
  case NEON::BI__builtin_neon_vcvtp_s64_v:
12305
10
  case NEON::BI__builtin_neon_vcvtpq_s64_v:
12306
11
  case NEON::BI__builtin_neon_vcvtp_u64_v:
12307
12
  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12308
12
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtpu6
:
Intrinsic::aarch64_neon_fcvtps6
;
12309
12
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12310
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12311
11
  }
12312
4
  case NEON::BI__builtin_neon_vmulx_v:
12313
9
  case NEON::BI__builtin_neon_vmulxq_v: {
12314
9
    Int = Intrinsic::aarch64_neon_fmulx;
12315
9
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12316
4
  }
12317
1
  case NEON::BI__builtin_neon_vmulxh_lane_f16:
12318
2
  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12319
    // vmulx_lane should be mapped to Neon scalar mulx after
12320
    // extracting the scalar element
12321
2
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
12322
2
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12323
2
    Ops.pop_back();
12324
2
    Int = Intrinsic::aarch64_neon_fmulx;
12325
2
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12326
1
  }
12327
1
  case NEON::BI__builtin_neon_vmul_lane_v:
12328
3
  case NEON::BI__builtin_neon_vmul_laneq_v: {
12329
    // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12330
3
    bool Quad = false;
12331
3
    if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12332
2
      Quad = true;
12333
3
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12334
3
    llvm::FixedVectorType *VTy =
12335
3
        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
12336
3
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12337
3
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12338
3
    Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12339
3
    return Builder.CreateBitCast(Result, Ty);
12340
1
  }
12341
1
  case NEON::BI__builtin_neon_vnegd_s64:
12342
1
    return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12343
1
  case NEON::BI__builtin_neon_vnegh_f16:
12344
1
    return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12345
2
  case NEON::BI__builtin_neon_vpmaxnm_v:
12346
5
  case NEON::BI__builtin_neon_vpmaxnmq_v: {
12347
5
    Int = Intrinsic::aarch64_neon_fmaxnmp;
12348
5
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12349
2
  }
12350
2
  case NEON::BI__builtin_neon_vpminnm_v:
12351
5
  case NEON::BI__builtin_neon_vpminnmq_v: {
12352
5
    Int = Intrinsic::aarch64_neon_fminnmp;
12353
5
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12354
2
  }
12355
3
  case NEON::BI__builtin_neon_vsqrth_f16: {
12356
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12357
3
    Int = Builder.getIsFPConstrained()
12358
3
              ? 
Intrinsic::experimental_constrained_sqrt1
12359
3
              : 
Intrinsic::sqrt2
;
12360
3
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12361
2
  }
12362
9
  case NEON::BI__builtin_neon_vsqrt_v:
12363
24
  case NEON::BI__builtin_neon_vsqrtq_v: {
12364
24
    Int = Builder.getIsFPConstrained()
12365
24
              ? 
Intrinsic::experimental_constrained_sqrt9
12366
24
              : 
Intrinsic::sqrt15
;
12367
24
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12368
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12369
9
  }
12370
3
  case NEON::BI__builtin_neon_vrbit_v:
12371
6
  case NEON::BI__builtin_neon_vrbitq_v: {
12372
6
    Int = Intrinsic::bitreverse;
12373
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12374
3
  }
12375
1
  case NEON::BI__builtin_neon_vaddv_u8:
12376
    // FIXME: These are handled by the AArch64 scalar code.
12377
1
    usgn = true;
12378
1
    [[fallthrough]];
12379
2
  case NEON::BI__builtin_neon_vaddv_s8: {
12380
2
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv1
:
Intrinsic::aarch64_neon_saddv1
;
12381
2
    Ty = Int32Ty;
12382
2
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12383
2
    llvm::Type *Tys[2] = { Ty, VTy };
12384
2
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12385
2
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12386
2
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12387
1
  }
12388
1
  case NEON::BI__builtin_neon_vaddv_u16:
12389
1
    usgn = true;
12390
1
    [[fallthrough]];
12391
2
  case NEON::BI__builtin_neon_vaddv_s16: {
12392
2
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv1
:
Intrinsic::aarch64_neon_saddv1
;
12393
2
    Ty = Int32Ty;
12394
2
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12395
2
    llvm::Type *Tys[2] = { Ty, VTy };
12396
2
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12397
2
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12398
2
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12399
1
  }
12400
1
  case NEON::BI__builtin_neon_vaddvq_u8:
12401
1
    usgn = true;
12402
1
    [[fallthrough]];
12403
2
  case NEON::BI__builtin_neon_vaddvq_s8: {
12404
2
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv1
:
Intrinsic::aarch64_neon_saddv1
;
12405
2
    Ty = Int32Ty;
12406
2
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12407
2
    llvm::Type *Tys[2] = { Ty, VTy };
12408
2
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12409
2
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12410
2
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12411
1
  }
12412
1
  case NEON::BI__builtin_neon_vaddvq_u16:
12413
1
    usgn = true;
12414
1
    [[fallthrough]];
12415
2
  case NEON::BI__builtin_neon_vaddvq_s16: {
12416
2
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv1
:
Intrinsic::aarch64_neon_saddv1
;
12417
2
    Ty = Int32Ty;
12418
2
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12419
2
    llvm::Type *Tys[2] = { Ty, VTy };
12420
2
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12421
2
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12422
2
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12423
1
  }
12424
1
  case NEON::BI__builtin_neon_vmaxv_u8: {
12425
1
    Int = Intrinsic::aarch64_neon_umaxv;
12426
1
    Ty = Int32Ty;
12427
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12428
1
    llvm::Type *Tys[2] = { Ty, VTy };
12429
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12430
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12431
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12432
1
  }
12433
1
  case NEON::BI__builtin_neon_vmaxv_u16: {
12434
1
    Int = Intrinsic::aarch64_neon_umaxv;
12435
1
    Ty = Int32Ty;
12436
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12437
1
    llvm::Type *Tys[2] = { Ty, VTy };
12438
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12439
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12440
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12441
1
  }
12442
1
  case NEON::BI__builtin_neon_vmaxvq_u8: {
12443
1
    Int = Intrinsic::aarch64_neon_umaxv;
12444
1
    Ty = Int32Ty;
12445
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12446
1
    llvm::Type *Tys[2] = { Ty, VTy };
12447
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12448
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12449
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12450
1
  }
12451
1
  case NEON::BI__builtin_neon_vmaxvq_u16: {
12452
1
    Int = Intrinsic::aarch64_neon_umaxv;
12453
1
    Ty = Int32Ty;
12454
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12455
1
    llvm::Type *Tys[2] = { Ty, VTy };
12456
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12457
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12458
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12459
1
  }
12460
1
  case NEON::BI__builtin_neon_vmaxv_s8: {
12461
1
    Int = Intrinsic::aarch64_neon_smaxv;
12462
1
    Ty = Int32Ty;
12463
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12464
1
    llvm::Type *Tys[2] = { Ty, VTy };
12465
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12466
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12467
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12468
1
  }
12469
1
  case NEON::BI__builtin_neon_vmaxv_s16: {
12470
1
    Int = Intrinsic::aarch64_neon_smaxv;
12471
1
    Ty = Int32Ty;
12472
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12473
1
    llvm::Type *Tys[2] = { Ty, VTy };
12474
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12475
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12476
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12477
1
  }
12478
1
  case NEON::BI__builtin_neon_vmaxvq_s8: {
12479
1
    Int = Intrinsic::aarch64_neon_smaxv;
12480
1
    Ty = Int32Ty;
12481
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12482
1
    llvm::Type *Tys[2] = { Ty, VTy };
12483
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12484
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12485
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12486
1
  }
12487
1
  case NEON::BI__builtin_neon_vmaxvq_s16: {
12488
1
    Int = Intrinsic::aarch64_neon_smaxv;
12489
1
    Ty = Int32Ty;
12490
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12491
1
    llvm::Type *Tys[2] = { Ty, VTy };
12492
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12493
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12494
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12495
1
  }
12496
1
  case NEON::BI__builtin_neon_vmaxv_f16: {
12497
1
    Int = Intrinsic::aarch64_neon_fmaxv;
12498
1
    Ty = HalfTy;
12499
1
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12500
1
    llvm::Type *Tys[2] = { Ty, VTy };
12501
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12502
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12503
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12504
1
  }
12505
1
  case NEON::BI__builtin_neon_vmaxvq_f16: {
12506
1
    Int = Intrinsic::aarch64_neon_fmaxv;
12507
1
    Ty = HalfTy;
12508
1
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12509
1
    llvm::Type *Tys[2] = { Ty, VTy };
12510
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12511
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12512
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12513
1
  }
12514
1
  case NEON::BI__builtin_neon_vminv_u8: {
12515
1
    Int = Intrinsic::aarch64_neon_uminv;
12516
1
    Ty = Int32Ty;
12517
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12518
1
    llvm::Type *Tys[2] = { Ty, VTy };
12519
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12520
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12521
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12522
1
  }
12523
1
  case NEON::BI__builtin_neon_vminv_u16: {
12524
1
    Int = Intrinsic::aarch64_neon_uminv;
12525
1
    Ty = Int32Ty;
12526
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12527
1
    llvm::Type *Tys[2] = { Ty, VTy };
12528
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12529
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12530
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12531
1
  }
12532
1
  case NEON::BI__builtin_neon_vminvq_u8: {
12533
1
    Int = Intrinsic::aarch64_neon_uminv;
12534
1
    Ty = Int32Ty;
12535
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12536
1
    llvm::Type *Tys[2] = { Ty, VTy };
12537
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12538
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12539
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12540
1
  }
12541
1
  case NEON::BI__builtin_neon_vminvq_u16: {
12542
1
    Int = Intrinsic::aarch64_neon_uminv;
12543
1
    Ty = Int32Ty;
12544
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12545
1
    llvm::Type *Tys[2] = { Ty, VTy };
12546
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12547
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12548
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12549
1
  }
12550
1
  case NEON::BI__builtin_neon_vminv_s8: {
12551
1
    Int = Intrinsic::aarch64_neon_sminv;
12552
1
    Ty = Int32Ty;
12553
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12554
1
    llvm::Type *Tys[2] = { Ty, VTy };
12555
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12556
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12557
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12558
1
  }
12559
1
  case NEON::BI__builtin_neon_vminv_s16: {
12560
1
    Int = Intrinsic::aarch64_neon_sminv;
12561
1
    Ty = Int32Ty;
12562
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12563
1
    llvm::Type *Tys[2] = { Ty, VTy };
12564
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12565
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12566
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12567
1
  }
12568
1
  case NEON::BI__builtin_neon_vminvq_s8: {
12569
1
    Int = Intrinsic::aarch64_neon_sminv;
12570
1
    Ty = Int32Ty;
12571
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12572
1
    llvm::Type *Tys[2] = { Ty, VTy };
12573
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12574
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12575
1
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12576
1
  }
12577
1
  case NEON::BI__builtin_neon_vminvq_s16: {
12578
1
    Int = Intrinsic::aarch64_neon_sminv;
12579
1
    Ty = Int32Ty;
12580
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12581
1
    llvm::Type *Tys[2] = { Ty, VTy };
12582
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12583
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12584
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12585
1
  }
12586
1
  case NEON::BI__builtin_neon_vminv_f16: {
12587
1
    Int = Intrinsic::aarch64_neon_fminv;
12588
1
    Ty = HalfTy;
12589
1
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12590
1
    llvm::Type *Tys[2] = { Ty, VTy };
12591
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12592
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12593
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12594
1
  }
12595
1
  case NEON::BI__builtin_neon_vminvq_f16: {
12596
1
    Int = Intrinsic::aarch64_neon_fminv;
12597
1
    Ty = HalfTy;
12598
1
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12599
1
    llvm::Type *Tys[2] = { Ty, VTy };
12600
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12601
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12602
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12603
1
  }
12604
1
  case NEON::BI__builtin_neon_vmaxnmv_f16: {
12605
1
    Int = Intrinsic::aarch64_neon_fmaxnmv;
12606
1
    Ty = HalfTy;
12607
1
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12608
1
    llvm::Type *Tys[2] = { Ty, VTy };
12609
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12610
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12611
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12612
1
  }
12613
1
  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
12614
1
    Int = Intrinsic::aarch64_neon_fmaxnmv;
12615
1
    Ty = HalfTy;
12616
1
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12617
1
    llvm::Type *Tys[2] = { Ty, VTy };
12618
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12619
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12620
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12621
1
  }
12622
1
  case NEON::BI__builtin_neon_vminnmv_f16: {
12623
1
    Int = Intrinsic::aarch64_neon_fminnmv;
12624
1
    Ty = HalfTy;
12625
1
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12626
1
    llvm::Type *Tys[2] = { Ty, VTy };
12627
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12628
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12629
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12630
1
  }
12631
1
  case NEON::BI__builtin_neon_vminnmvq_f16: {
12632
1
    Int = Intrinsic::aarch64_neon_fminnmv;
12633
1
    Ty = HalfTy;
12634
1
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12635
1
    llvm::Type *Tys[2] = { Ty, VTy };
12636
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12637
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12638
1
    return Builder.CreateTrunc(Ops[0], HalfTy);
12639
1
  }
12640
1
  case NEON::BI__builtin_neon_vmul_n_f64: {
12641
1
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12642
1
    Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
12643
1
    return Builder.CreateFMul(Ops[0], RHS);
12644
1
  }
12645
1
  case NEON::BI__builtin_neon_vaddlv_u8: {
12646
1
    Int = Intrinsic::aarch64_neon_uaddlv;
12647
1
    Ty = Int32Ty;
12648
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12649
1
    llvm::Type *Tys[2] = { Ty, VTy };
12650
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12651
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12652
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12653
1
  }
12654
1
  case NEON::BI__builtin_neon_vaddlv_u16: {
12655
1
    Int = Intrinsic::aarch64_neon_uaddlv;
12656
1
    Ty = Int32Ty;
12657
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12658
1
    llvm::Type *Tys[2] = { Ty, VTy };
12659
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12660
1
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12661
1
  }
12662
1
  case NEON::BI__builtin_neon_vaddlvq_u8: {
12663
1
    Int = Intrinsic::aarch64_neon_uaddlv;
12664
1
    Ty = Int32Ty;
12665
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12666
1
    llvm::Type *Tys[2] = { Ty, VTy };
12667
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12668
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12669
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12670
1
  }
12671
1
  case NEON::BI__builtin_neon_vaddlvq_u16: {
12672
1
    Int = Intrinsic::aarch64_neon_uaddlv;
12673
1
    Ty = Int32Ty;
12674
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12675
1
    llvm::Type *Tys[2] = { Ty, VTy };
12676
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12677
1
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12678
1
  }
12679
1
  case NEON::BI__builtin_neon_vaddlv_s8: {
12680
1
    Int = Intrinsic::aarch64_neon_saddlv;
12681
1
    Ty = Int32Ty;
12682
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12683
1
    llvm::Type *Tys[2] = { Ty, VTy };
12684
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12685
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12686
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12687
1
  }
12688
1
  case NEON::BI__builtin_neon_vaddlv_s16: {
12689
1
    Int = Intrinsic::aarch64_neon_saddlv;
12690
1
    Ty = Int32Ty;
12691
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12692
1
    llvm::Type *Tys[2] = { Ty, VTy };
12693
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12694
1
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12695
1
  }
12696
1
  case NEON::BI__builtin_neon_vaddlvq_s8: {
12697
1
    Int = Intrinsic::aarch64_neon_saddlv;
12698
1
    Ty = Int32Ty;
12699
1
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12700
1
    llvm::Type *Tys[2] = { Ty, VTy };
12701
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12702
1
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12703
1
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12704
1
  }
12705
1
  case NEON::BI__builtin_neon_vaddlvq_s16: {
12706
1
    Int = Intrinsic::aarch64_neon_saddlv;
12707
1
    Ty = Int32Ty;
12708
1
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12709
1
    llvm::Type *Tys[2] = { Ty, VTy };
12710
1
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12711
1
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12712
1
  }
12713
11
  case NEON::BI__builtin_neon_vsri_n_v:
12714
22
  case NEON::BI__builtin_neon_vsriq_n_v: {
12715
22
    Int = Intrinsic::aarch64_neon_vsri;
12716
22
    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
12717
22
    return EmitNeonCall(Intrin, Ops, "vsri_n");
12718
11
  }
12719
11
  case NEON::BI__builtin_neon_vsli_n_v:
12720
22
  case NEON::BI__builtin_neon_vsliq_n_v: {
12721
22
    Int = Intrinsic::aarch64_neon_vsli;
12722
22
    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
12723
22
    return EmitNeonCall(Intrin, Ops, "vsli_n");
12724
11
  }
12725
11
  case NEON::BI__builtin_neon_vsra_n_v:
12726
19
  case NEON::BI__builtin_neon_vsraq_n_v:
12727
19
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12728
19
    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
12729
19
    return Builder.CreateAdd(Ops[0], Ops[1]);
12730
8
  case NEON::BI__builtin_neon_vrsra_n_v:
12731
16
  case NEON::BI__builtin_neon_vrsraq_n_v: {
12732
16
    Int = usgn ? 
Intrinsic::aarch64_neon_urshl8
:
Intrinsic::aarch64_neon_srshl8
;
12733
16
    SmallVector<llvm::Value*,2> TmpOps;
12734
16
    TmpOps.push_back(Ops[1]);
12735
16
    TmpOps.push_back(Ops[2]);
12736
16
    Function* F = CGM.getIntrinsic(Int, Ty);
12737
16
    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
12738
16
    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
12739
16
    return Builder.CreateAdd(Ops[0], tmp);
12740
8
  }
12741
32
  case NEON::BI__builtin_neon_vld1_v:
12742
47
  case NEON::BI__builtin_neon_vld1q_v: {
12743
47
    return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
12744
32
  }
12745
15
  case NEON::BI__builtin_neon_vst1_v:
12746
30
  case NEON::BI__builtin_neon_vst1q_v:
12747
30
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12748
30
    return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
12749
15
  case NEON::BI__builtin_neon_vld1_lane_v:
12750
30
  case NEON::BI__builtin_neon_vld1q_lane_v: {
12751
30
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12752
30
    Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
12753
30
                                       PtrOp0.getAlignment());
12754
30
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
12755
15
  }
12756
4
  case NEON::BI__builtin_neon_vldap1_lane_s64:
12757
8
  case NEON::BI__builtin_neon_vldap1q_lane_s64: {
12758
8
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12759
8
    llvm::LoadInst *LI = Builder.CreateAlignedLoad(
12760
8
        VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
12761
8
    LI->setAtomic(llvm::AtomicOrdering::Acquire);
12762
8
    Ops[0] = LI;
12763
8
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
12764
4
  }
12765
15
  case NEON::BI__builtin_neon_vld1_dup_v:
12766
30
  case NEON::BI__builtin_neon_vld1q_dup_v: {
12767
30
    Value *V = PoisonValue::get(Ty);
12768
30
    Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
12769
30
                                       PtrOp0.getAlignment());
12770
30
    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
12771
30
    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
12772
30
    return EmitNeonSplat(Ops[0], CI);
12773
15
  }
12774
15
  case NEON::BI__builtin_neon_vst1_lane_v:
12775
30
  case NEON::BI__builtin_neon_vst1q_lane_v:
12776
30
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12777
30
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
12778
30
    return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
12779
4
  case NEON::BI__builtin_neon_vstl1_lane_s64:
12780
8
  case NEON::BI__builtin_neon_vstl1q_lane_s64: {
12781
8
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12782
8
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
12783
8
    llvm::StoreInst *SI =
12784
8
        Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
12785
8
    SI->setAtomic(llvm::AtomicOrdering::Release);
12786
8
    return SI;
12787
4
  }
12788
15
  case NEON::BI__builtin_neon_vld2_v:
12789
30
  case NEON::BI__builtin_neon_vld2q_v: {
12790
30
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12791
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
12792
30
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
12793
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12794
15
  }
12795
15
  case NEON::BI__builtin_neon_vld3_v:
12796
30
  case NEON::BI__builtin_neon_vld3q_v: {
12797
30
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12798
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
12799
30
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
12800
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12801
15
  }
12802
15
  case NEON::BI__builtin_neon_vld4_v:
12803
30
  case NEON::BI__builtin_neon_vld4q_v: {
12804
30
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12805
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
12806
30
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
12807
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12808
15
  }
12809
15
  case NEON::BI__builtin_neon_vld2_dup_v:
12810
30
  case NEON::BI__builtin_neon_vld2q_dup_v: {
12811
30
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12812
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
12813
30
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
12814
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12815
15
  }
12816
15
  case NEON::BI__builtin_neon_vld3_dup_v:
12817
30
  case NEON::BI__builtin_neon_vld3q_dup_v: {
12818
30
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12819
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
12820
30
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
12821
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12822
15
  }
12823
15
  case NEON::BI__builtin_neon_vld4_dup_v:
12824
30
  case NEON::BI__builtin_neon_vld4q_dup_v: {
12825
30
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12826
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
12827
30
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
12828
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12829
15
  }
12830
15
  case NEON::BI__builtin_neon_vld2_lane_v:
12831
30
  case NEON::BI__builtin_neon_vld2q_lane_v: {
12832
30
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
12833
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
12834
30
    std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
12835
30
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12836
30
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12837
30
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
12838
30
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
12839
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12840
15
  }
12841
15
  case NEON::BI__builtin_neon_vld3_lane_v:
12842
30
  case NEON::BI__builtin_neon_vld3q_lane_v: {
12843
30
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
12844
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
12845
30
    std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
12846
30
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12847
30
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12848
30
    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
12849
30
    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
12850
30
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
12851
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12852
15
  }
12853
15
  case NEON::BI__builtin_neon_vld4_lane_v:
12854
30
  case NEON::BI__builtin_neon_vld4q_lane_v: {
12855
30
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
12856
30
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
12857
30
    std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
12858
30
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12859
30
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12860
30
    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
12861
30
    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
12862
30
    Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
12863
30
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
12864
30
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12865
15
  }
12866
15
  case NEON::BI__builtin_neon_vst2_v:
12867
30
  case NEON::BI__builtin_neon_vst2q_v: {
12868
30
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12869
30
    llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
12870
30
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
12871
30
                        Ops, "");
12872
15
  }
12873
15
  case NEON::BI__builtin_neon_vst2_lane_v:
12874
30
  case NEON::BI__builtin_neon_vst2q_lane_v: {
12875
30
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12876
30
    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
12877
30
    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
12878
30
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
12879
30
                        Ops, "");
12880
15
  }
12881
15
  case NEON::BI__builtin_neon_vst3_v:
12882
30
  case NEON::BI__builtin_neon_vst3q_v: {
12883
30
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12884
30
    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
12885
30
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
12886
30
                        Ops, "");
12887
15
  }
12888
15
  case NEON::BI__builtin_neon_vst3_lane_v:
12889
30
  case NEON::BI__builtin_neon_vst3q_lane_v: {
12890
30
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12891
30
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
12892
30
    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
12893
30
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
12894
30
                        Ops, "");
12895
15
  }
12896
15
  case NEON::BI__builtin_neon_vst4_v:
12897
30
  case NEON::BI__builtin_neon_vst4q_v: {
12898
30
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12899
30
    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
12900
30
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
12901
30
                        Ops, "");
12902
15
  }
12903
15
  case NEON::BI__builtin_neon_vst4_lane_v:
12904
30
  case NEON::BI__builtin_neon_vst4q_lane_v: {
12905
30
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12906
30
    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
12907
30
    llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
12908
30
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
12909
30
                        Ops, "");
12910
15
  }
12911
10
  case NEON::BI__builtin_neon_vtrn_v:
12912
20
  case NEON::BI__builtin_neon_vtrnq_v: {
12913
20
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12914
20
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12915
20
    Value *SV = nullptr;
12916
12917
60
    for (unsigned vi = 0; vi != 2; 
++vi40
) {
12918
40
      SmallVector<int, 16> Indices;
12919
178
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; 
i += 2138
) {
12920
138
        Indices.push_back(i+vi);
12921
138
        Indices.push_back(i+e+vi);
12922
138
      }
12923
40
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
12924
40
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
12925
40
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
12926
40
    }
12927
20
    return SV;
12928
10
  }
12929
10
  case NEON::BI__builtin_neon_vuzp_v:
12930
20
  case NEON::BI__builtin_neon_vuzpq_v: {
12931
20
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12932
20
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12933
20
    Value *SV = nullptr;
12934
12935
60
    for (unsigned vi = 0; vi != 2; 
++vi40
) {
12936
40
      SmallVector<int, 16> Indices;
12937
316
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; 
++i276
)
12938
276
        Indices.push_back(2*i+vi);
12939
12940
40
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
12941
40
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
12942
40
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
12943
40
    }
12944
20
    return SV;
12945
10
  }
12946
10
  case NEON::BI__builtin_neon_vzip_v:
12947
22
  case NEON::BI__builtin_neon_vzipq_v: {
12948
22
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12949
22
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12950
22
    Value *SV = nullptr;
12951
12952
66
    for (unsigned vi = 0; vi != 2; 
++vi44
) {
12953
44
      SmallVector<int, 16> Indices;
12954
214
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; 
i += 2170
) {
12955
170
        Indices.push_back((i + vi*e) >> 1);
12956
170
        Indices.push_back(((i + vi*e) >> 1)+e);
12957
170
      }
12958
44
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
12959
44
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
12960
44
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
12961
44
    }
12962
22
    return SV;
12963
10
  }
12964
0
  case NEON::BI__builtin_neon_vqtbl1q_v: {
12965
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
12966
0
                        Ops, "vtbl1");
12967
10
  }
12968
0
  case NEON::BI__builtin_neon_vqtbl2q_v: {
12969
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
12970
0
                        Ops, "vtbl2");
12971
10
  }
12972
0
  case NEON::BI__builtin_neon_vqtbl3q_v: {
12973
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
12974
0
                        Ops, "vtbl3");
12975
10
  }
12976
0
  case NEON::BI__builtin_neon_vqtbl4q_v: {
12977
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
12978
0
                        Ops, "vtbl4");
12979
10
  }
12980
0
  case NEON::BI__builtin_neon_vqtbx1q_v: {
12981
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
12982
0
                        Ops, "vtbx1");
12983
10
  }
12984
0
  case NEON::BI__builtin_neon_vqtbx2q_v: {
12985
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
12986
0
                        Ops, "vtbx2");
12987
10
  }
12988
0
  case NEON::BI__builtin_neon_vqtbx3q_v: {
12989
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
12990
0
                        Ops, "vtbx3");
12991
10
  }
12992
0
  case NEON::BI__builtin_neon_vqtbx4q_v: {
12993
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
12994
0
                        Ops, "vtbx4");
12995
10
  }
12996
4
  case NEON::BI__builtin_neon_vsqadd_v:
12997
8
  case NEON::BI__builtin_neon_vsqaddq_v: {
12998
8
    Int = Intrinsic::aarch64_neon_usqadd;
12999
8
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13000
4
  }
13001
7
  case NEON::BI__builtin_neon_vuqadd_v:
13002
15
  case NEON::BI__builtin_neon_vuqaddq_v: {
13003
15
    Int = Intrinsic::aarch64_neon_suqadd;
13004
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13005
7
  }
13006
1.40k
  }
13007
1.40k
}
13008
13009
Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13010
0
                                           const CallExpr *E) {
13011
0
  assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13012
0
          BuiltinID == BPF::BI__builtin_btf_type_id ||
13013
0
          BuiltinID == BPF::BI__builtin_preserve_type_info ||
13014
0
          BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13015
0
         "unexpected BPF builtin");
13016
13017
  // A sequence number, injected into IR builtin functions, to
13018
  // prevent CSE given the only difference of the function
13019
  // may just be the debuginfo metadata.
13020
0
  static uint32_t BuiltinSeqNum;
13021
13022
0
  switch (BuiltinID) {
13023
0
  default:
13024
0
    llvm_unreachable("Unexpected BPF builtin");
13025
0
  case BPF::BI__builtin_preserve_field_info: {
13026
0
    const Expr *Arg = E->getArg(0);
13027
0
    bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13028
13029
0
    if (!getDebugInfo()) {
13030
0
      CGM.Error(E->getExprLoc(),
13031
0
                "using __builtin_preserve_field_info() without -g");
13032
0
      return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
13033
0
                        : EmitLValue(Arg).getPointer(*this);
13034
0
    }
13035
13036
    // Enable underlying preserve_*_access_index() generation.
13037
0
    bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13038
0
    IsInPreservedAIRegion = true;
13039
0
    Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
13040
0
                                  : EmitLValue(Arg).getPointer(*this);
13041
0
    IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13042
13043
0
    ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13044
0
    Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13045
13046
    // Built the IR for the preserve_field_info intrinsic.
13047
0
    llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13048
0
        &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13049
0
        {FieldAddr->getType()});
13050
0
    return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13051
0
  }
13052
0
  case BPF::BI__builtin_btf_type_id:
13053
0
  case BPF::BI__builtin_preserve_type_info: {
13054
0
    if (!getDebugInfo()) {
13055
0
      CGM.Error(E->getExprLoc(), "using builtin function without -g");
13056
0
      return nullptr;
13057
0
    }
13058
13059
0
    const Expr *Arg0 = E->getArg(0);
13060
0
    llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13061
0
        Arg0->getType(), Arg0->getExprLoc());
13062
13063
0
    ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13064
0
    Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13065
0
    Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13066
13067
0
    llvm::Function *FnDecl;
13068
0
    if (BuiltinID == BPF::BI__builtin_btf_type_id)
13069
0
      FnDecl = llvm::Intrinsic::getDeclaration(
13070
0
          &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13071
0
    else
13072
0
      FnDecl = llvm::Intrinsic::getDeclaration(
13073
0
          &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13074
0
    CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13075
0
    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13076
0
    return Fn;
13077
0
  }
13078
0
  case BPF::BI__builtin_preserve_enum_value: {
13079
0
    if (!getDebugInfo()) {
13080
0
      CGM.Error(E->getExprLoc(), "using builtin function without -g");
13081
0
      return nullptr;
13082
0
    }
13083
13084
0
    const Expr *Arg0 = E->getArg(0);
13085
0
    llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13086
0
        Arg0->getType(), Arg0->getExprLoc());
13087
13088
    // Find enumerator
13089
0
    const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13090
0
    const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13091
0
    const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13092
0
    const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13093
13094
0
    auto &InitVal = Enumerator->getInitVal();
13095
0
    std::string InitValStr;
13096
0
    if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13097
0
      InitValStr = std::to_string(InitVal.getSExtValue());
13098
0
    else
13099
0
      InitValStr = std::to_string(InitVal.getZExtValue());
13100
0
    std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13101
0
    Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13102
13103
0
    ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13104
0
    Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13105
0
    Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13106
13107
0
    llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13108
0
        &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13109
0
    CallInst *Fn =
13110
0
        Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13111
0
    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13112
0
    return Fn;
13113
0
  }
13114
0
  }
13115
0
}
13116
13117
llvm::Value *CodeGenFunction::
13118
94
BuildVector(ArrayRef<llvm::Value*> Ops) {
13119
94
  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13120
94
         "Not a power-of-two sized vector!");
13121
94
  bool AllConstants = true;
13122
216
  for (unsigned i = 0, e = Ops.size(); i != e && 
AllConstants210
;
++i122
)
13123
122
    AllConstants &= isa<Constant>(Ops[i]);
13124
13125
  // If this is a constant vector, create a ConstantVector.
13126
94
  if (AllConstants) {
13127
4
    SmallVector<llvm::Constant*, 16> CstOps;
13128
34
    for (unsigned i = 0, e = Ops.size(); i != e; 
++i30
)
13129
30
      CstOps.push_back(cast<Constant>(Ops[i]));
13130
4
    return llvm::ConstantVector::get(CstOps);
13131
4
  }
13132
13133
  // Otherwise, insertelement the values to build the vector.
13134
90
  Value *Result = llvm::PoisonValue::get(
13135
90
      llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13136
13137
686
  for (unsigned i = 0, e = Ops.size(); i != e; 
++i596
)
13138
596
    Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13139
13140
90
  return Result;
13141
94
}
13142
13143
// Convert the mask from an integer type to a vector of i1.
13144
static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
13145
4.74k
                              unsigned NumElts) {
13146
13147
4.74k
  auto *MaskTy = llvm::FixedVectorType::get(
13148
4.74k
      CGF.Builder.getInt1Ty(),
13149
4.74k
      cast<IntegerType>(Mask->getType())->getBitWidth());
13150
4.74k
  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13151
13152
  // If we have less than 8 elements, then the starting mask was an i8 and
13153
  // we need to extract down to the right number of elements.
13154
4.74k
  if (NumElts < 8) {
13155
1.06k
    int Indices[4];
13156
4.63k
    for (unsigned i = 0; i != NumElts; 
++i3.57k
)
13157
3.57k
      Indices[i] = i;
13158
1.06k
    MaskVec = CGF.Builder.CreateShuffleVector(
13159
1.06k
        MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13160
1.06k
  }
13161
4.74k
  return MaskVec;
13162
4.74k
}
13163
13164
static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13165
45
                                 Align Alignment) {
13166
45
  Value *Ptr = Ops[0];
13167
13168
45
  Value *MaskVec = getMaskVecValue(
13169
45
      CGF, Ops[2],
13170
45
      cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13171
13172
45
  return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13173
45
}
13174
13175
static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13176
94
                                Align Alignment) {
13177
94
  llvm::Type *Ty = Ops[1]->getType();
13178
94
  Value *Ptr = Ops[0];
13179
13180
94
  Value *MaskVec = getMaskVecValue(
13181
94
      CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13182
13183
94
  return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13184
94
}
13185
13186
static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
13187
44
                                ArrayRef<Value *> Ops) {
13188
44
  auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13189
44
  Value *Ptr = Ops[0];
13190
13191
44
  Value *MaskVec = getMaskVecValue(
13192
44
      CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13193
13194
44
  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13195
44
                                           ResultTy);
13196
44
  return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13197
44
}
13198
13199
static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
13200
                                    ArrayRef<Value *> Ops,
13201
88
                                    bool IsCompress) {
13202
88
  auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13203
13204
88
  Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13205
13206
88
  Intrinsic::ID IID = IsCompress ? 
Intrinsic::x86_avx512_mask_compress44
13207
88
                                 : 
Intrinsic::x86_avx512_mask_expand44
;
13208
88
  llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13209
88
  return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13210
88
}
13211
13212
static Value *EmitX86CompressStore(CodeGenFunction &CGF,
13213
22
                                   ArrayRef<Value *> Ops) {
13214
22
  auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13215
22
  Value *Ptr = Ops[0];
13216
13217
22
  Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13218
13219
22
  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13220
22
                                           ResultTy);
13221
22
  return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13222
22
}
13223
13224
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13225
                              ArrayRef<Value *> Ops,
13226
68
                              bool InvertLHS = false) {
13227
68
  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13228
68
  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13229
68
  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13230
13231
68
  if (InvertLHS)
13232
14
    LHS = CGF.Builder.CreateNot(LHS);
13233
13234
68
  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13235
68
                                   Ops[0]->getType());
13236
68
}
13237
13238
static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
13239
168
                                 Value *Amt, bool IsRight) {
13240
168
  llvm::Type *Ty = Op0->getType();
13241
13242
  // Amount may be scalar immediate, in which case create a splat vector.
13243
  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13244
  // we only care about the lowest log2 bits anyway.
13245
168
  if (Amt->getType() != Ty) {
13246
118
    unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13247
118
    Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13248
118
    Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13249
118
  }
13250
13251
168
  unsigned IID = IsRight ? 
Intrinsic::fshr68
:
Intrinsic::fshl100
;
13252
168
  Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13253
168
  return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13254
168
}
13255
13256
static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13257
288
                           bool IsSigned) {
13258
288
  Value *Op0 = Ops[0];
13259
288
  Value *Op1 = Ops[1];
13260
288
  llvm::Type *Ty = Op0->getType();
13261
288
  uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13262
13263
288
  CmpInst::Predicate Pred;
13264
288
  switch (Imm) {
13265
64
  case 0x0:
13266
64
    Pred = IsSigned ? 
ICmpInst::ICMP_SLT32
:
ICmpInst::ICMP_ULT32
;
13267
64
    break;
13268
32
  case 0x1:
13269
32
    Pred = IsSigned ? 
ICmpInst::ICMP_SLE16
:
ICmpInst::ICMP_ULE16
;
13270
32
    break;
13271
32
  case 0x2:
13272
32
    Pred = IsSigned ? 
ICmpInst::ICMP_SGT16
:
ICmpInst::ICMP_UGT16
;
13273
32
    break;
13274
32
  case 0x3:
13275
32
    Pred = IsSigned ? 
ICmpInst::ICMP_SGE16
:
ICmpInst::ICMP_UGE16
;
13276
32
    break;
13277
32
  case 0x4:
13278
32
    Pred = ICmpInst::ICMP_EQ;
13279
32
    break;
13280
32
  case 0x5:
13281
32
    Pred = ICmpInst::ICMP_NE;
13282
32
    break;
13283
32
  case 0x6:
13284
32
    return llvm::Constant::getNullValue(Ty); // FALSE
13285
32
  case 0x7:
13286
32
    return llvm::Constant::getAllOnesValue(Ty); // TRUE
13287
0
  default:
13288
0
    llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13289
288
  }
13290
13291
224
  Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13292
224
  Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13293
224
  return Res;
13294
288
}
13295
13296
static Value *EmitX86Select(CodeGenFunction &CGF,
13297
2.92k
                            Value *Mask, Value *Op0, Value *Op1) {
13298
13299
  // If the mask is all ones just return first argument.
13300
2.92k
  if (const auto *C = dyn_cast<Constant>(Mask))
13301
143
    if (C->isAllOnesValue())
13302
143
      return Op0;
13303
13304
2.78k
  Mask = getMaskVecValue(
13305
2.78k
      CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13306
13307
2.78k
  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13308
2.92k
}
13309
13310
static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
13311
229
                                  Value *Mask, Value *Op0, Value *Op1) {
13312
  // If the mask is all ones just return first argument.
13313
229
  if (const auto *C = dyn_cast<Constant>(Mask))
13314
25
    if (C->isAllOnesValue())
13315
25
      return Op0;
13316
13317
204
  auto *MaskTy = llvm::FixedVectorType::get(
13318
204
      CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13319
204
  Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13320
204
  Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13321
204
  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13322
229
}
13323
13324
static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
13325
2.24k
                                         unsigned NumElts, Value *MaskIn) {
13326
2.24k
  if (MaskIn) {
13327
1.75k
    const auto *C = dyn_cast<Constant>(MaskIn);
13328
1.75k
    if (!C || 
!C->isAllOnesValue()946
)
13329
806
      Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13330
1.75k
  }
13331
13332
2.24k
  if (NumElts < 8) {
13333
501
    int Indices[8];
13334
2.17k
    for (unsigned i = 0; i != NumElts; 
++i1.67k
)
13335
1.67k
      Indices[i] = i;
13336
2.83k
    for (unsigned i = NumElts; i != 8; 
++i2.33k
)
13337
2.33k
      Indices[i] = i % NumElts + NumElts;
13338
501
    Cmp = CGF.Builder.CreateShuffleVector(
13339
501
        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13340
501
  }
13341
13342
2.24k
  return CGF.Builder.CreateBitCast(Cmp,
13343
2.24k
                                   IntegerType::get(CGF.getLLVMContext(),
13344
2.24k
                                                    std::max(NumElts, 8U)));
13345
2.24k
}
13346
13347
static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
13348
969
                                   bool Signed, ArrayRef<Value *> Ops) {
13349
969
  assert((Ops.size() == 2 || Ops.size() == 4) &&
13350
969
         "Unexpected number of arguments");
13351
969
  unsigned NumElts =
13352
969
      cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13353
969
  Value *Cmp;
13354
13355
969
  if (CC == 3) {
13356
0
    Cmp = Constant::getNullValue(
13357
0
        llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13358
969
  } else if (CC == 7) {
13359
0
    Cmp = Constant::getAllOnesValue(
13360
0
        llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13361
969
  } else {
13362
969
    ICmpInst::Predicate Pred;
13363
969
    switch (CC) {
13364
0
    default: llvm_unreachable("Unknown condition code");
13365
199
    case 0: Pred = ICmpInst::ICMP_EQ;  break;
13366
100
    case 1: Pred = Signed ? 
ICmpInst::ICMP_SLT60
:
ICmpInst::ICMP_ULT40
; break;
13367
81
    case 2: Pred = Signed ? 
ICmpInst::ICMP_SLE41
:
ICmpInst::ICMP_ULE40
; break;
13368
429
    case 4: Pred = ICmpInst::ICMP_NE;  break;
13369
80
    case 5: Pred = Signed ? 
ICmpInst::ICMP_SGE40
:
ICmpInst::ICMP_UGE40
; break;
13370
80
    case 6: Pred = Signed ? 
ICmpInst::ICMP_SGT40
:
ICmpInst::ICMP_UGT40
; break;
13371
969
    }
13372
969
    Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13373
969
  }
13374
13375
969
  Value *MaskIn = nullptr;
13376
969
  if (Ops.size() == 4)
13377
951
    MaskIn = Ops[3];
13378
13379
969
  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13380
969
}
13381
13382
18
static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
13383
18
  Value *Zero = Constant::getNullValue(In->getType());
13384
18
  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13385
18
}
13386
13387
static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
13388
138
                                    ArrayRef<Value *> Ops, bool IsSigned) {
13389
138
  unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13390
138
  llvm::Type *Ty = Ops[1]->getType();
13391
13392
138
  Value *Res;
13393
138
  if (Rnd != 4) {
13394
90
    Intrinsic::ID IID = IsSigned ? 
Intrinsic::x86_avx512_sitofp_round45
13395
90
                                 : 
Intrinsic::x86_avx512_uitofp_round45
;
13396
90
    Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13397
90
    Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13398
90
  } else {
13399
48
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13400
48
    Res = IsSigned ? 
CGF.Builder.CreateSIToFP(Ops[0], Ty)24
13401
48
                   : 
CGF.Builder.CreateUIToFP(Ops[0], Ty)24
;
13402
48
  }
13403
13404
138
  return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13405
138
}
13406
13407
// Lowers X86 FMA intrinsics to IR.
13408
static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
13409
                             ArrayRef<Value *> Ops, unsigned BuiltinID,
13410
433
                             bool IsAddSub) {
13411
13412
433
  bool Subtract = false;
13413
433
  Intrinsic::ID IID = Intrinsic::not_intrinsic;
13414
433
  switch (BuiltinID) {
13415
193
  default: break;
13416
193
  case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13417
4
    Subtract = true;
13418
4
    [[fallthrough]];
13419
20
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13420
28
  case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13421
32
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13422
32
    IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13423
32
    break;
13424
2
  case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13425
2
    Subtract = true;
13426
2
    [[fallthrough]];
13427
10
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13428
14
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13429
16
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13430
16
    IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13431
16
    break;
13432
8
  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13433
8
    Subtract = true;
13434
8
    [[fallthrough]];
13435
40
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13436
56
  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13437
64
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13438
64
    IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13439
8
  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13440
8
    Subtract = true;
13441
8
    [[fallthrough]];
13442
40
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13443
56
  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13444
64
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13445
64
    IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13446
4
  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13447
4
    Subtract = true;
13448
4
    [[fallthrough]];
13449
20
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13450
28
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13451
32
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13452
32
    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13453
32
    break;
13454
4
  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13455
4
    Subtract = true;
13456
4
    [[fallthrough]];
13457
20
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13458
28
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13459
32
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13460
32
    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13461
32
    break;
13462
433
  }
13463
13464
433
  Value *A = Ops[0];
13465
433
  Value *B = Ops[1];
13466
433
  Value *C = Ops[2];
13467
13468
433
  if (Subtract)
13469
30
    C = CGF.Builder.CreateFNeg(C);
13470
13471
433
  Value *Res;
13472
13473
  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13474
433
  if (IID != Intrinsic::not_intrinsic &&
13475
433
      
(240
cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4240
||
13476
240
       
IsAddSub120
)) {
13477
160
    Function *Intr = CGF.CGM.getIntrinsic(IID);
13478
160
    Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13479
273
  } else {
13480
273
    llvm::Type *Ty = A->getType();
13481
273
    Function *FMA;
13482
273
    if (CGF.Builder.getIsFPConstrained()) {
13483
32
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13484
32
      FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13485
32
      Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13486
241
    } else {
13487
241
      FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13488
241
      Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13489
241
    }
13490
273
  }
13491
13492
  // Handle any required masking.
13493
433
  Value *MaskFalseVal = nullptr;
13494
433
  switch (BuiltinID) {
13495
16
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13496
48
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13497
80
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13498
88
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13499
104
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13500
120
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13501
120
    MaskFalseVal = Ops[0];
13502
120
    break;
13503
8
  case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13504
24
  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13505
40
  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13506
44
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13507
52
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13508
60
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13509
60
    MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13510
60
    break;
13511
4
  case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13512
8
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13513
16
  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13514
24
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13515
32
  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13516
40
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13517
42
  case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13518
44
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13519
48
  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13520
52
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13521
56
  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13522
60
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13523
60
    MaskFalseVal = Ops[2];
13524
60
    break;
13525
433
  }
13526
13527
433
  if (MaskFalseVal)
13528
240
    return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
13529
13530
193
  return Res;
13531
433
}
13532
13533
static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
13534
                                MutableArrayRef<Value *> Ops, Value *Upper,
13535
                                bool ZeroMask = false, unsigned PTIdx = 0,
13536
200
                                bool NegAcc = false) {
13537
200
  unsigned Rnd = 4;
13538
200
  if (Ops.size() > 4)
13539
144
    Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
13540
13541
200
  if (NegAcc)
13542
20
    Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
13543
13544
200
  Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
13545
200
  Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
13546
200
  Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
13547
200
  Value *Res;
13548
200
  if (Rnd != 4) {
13549
80
    Intrinsic::ID IID;
13550
13551
80
    switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
13552
16
    case 16:
13553
16
      IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
13554
16
      break;
13555
32
    case 32:
13556
32
      IID = Intrinsic::x86_avx512_vfmadd_f32;
13557
32
      break;
13558
32
    case 64:
13559
32
      IID = Intrinsic::x86_avx512_vfmadd_f64;
13560
32
      break;
13561
0
    default:
13562
0
      llvm_unreachable("Unexpected size");
13563
80
    }
13564
80
    Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13565
80
                                 {Ops[0], Ops[1], Ops[2], Ops[4]});
13566
120
  } else if (CGF.Builder.getIsFPConstrained()) {
13567
16
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13568
16
    Function *FMA = CGF.CGM.getIntrinsic(
13569
16
        Intrinsic::experimental_constrained_fma, Ops[0]->getType());
13570
16
    Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
13571
104
  } else {
13572
104
    Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
13573
104
    Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
13574
104
  }
13575
  // If we have more than 3 arguments, we need to do masking.
13576
200
  if (Ops.size() > 3) {
13577
144
    Value *PassThru = ZeroMask ? 
Constant::getNullValue(Res->getType())40
13578
144
                               : 
Ops[PTIdx]104
;
13579
13580
    // If we negated the accumulator and the its the PassThru value we need to
13581
    // bypass the negate. Conveniently Upper should be the same thing in this
13582
    // case.
13583
144
    if (NegAcc && 
PTIdx == 220
)
13584
20
      PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
13585
13586
144
    Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
13587
144
  }
13588
200
  return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
13589
200
}
13590
13591
static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
13592
27
                           ArrayRef<Value *> Ops) {
13593
27
  llvm::Type *Ty = Ops[0]->getType();
13594
  // Arguments have a vXi32 type so cast to vXi64.
13595
27
  Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
13596
27
                                  Ty->getPrimitiveSizeInBits() / 64);
13597
27
  Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
13598
27
  Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
13599
13600
27
  if (IsSigned) {
13601
    // Shift left then arithmetic shift right.
13602
12
    Constant *ShiftAmt = ConstantInt::get(Ty, 32);
13603
12
    LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
13604
12
    LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
13605
12
    RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
13606
12
    RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
13607
15
  } else {
13608
    // Clear the upper bits.
13609
15
    Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
13610
15
    LHS = CGF.Builder.CreateAnd(LHS, Mask);
13611
15
    RHS = CGF.Builder.CreateAnd(RHS, Mask);
13612
15
  }
13613
13614
27
  return CGF.Builder.CreateMul(LHS, RHS);
13615
27
}
13616
13617
// Emit a masked pternlog intrinsic. This only exists because the header has to
13618
// use a macro and we aren't able to pass the input argument to a pternlog
13619
// builtin and a select builtin without evaluating it twice.
13620
static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
13621
24
                             ArrayRef<Value *> Ops) {
13622
24
  llvm::Type *Ty = Ops[0]->getType();
13623
13624
24
  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
13625
24
  unsigned EltWidth = Ty->getScalarSizeInBits();
13626
24
  Intrinsic::ID IID;
13627
24
  if (VecWidth == 128 && 
EltWidth == 326
)
13628
3
    IID = Intrinsic::x86_avx512_pternlog_d_128;
13629
21
  else if (VecWidth == 256 && 
EltWidth == 326
)
13630
3
    IID = Intrinsic::x86_avx512_pternlog_d_256;
13631
18
  else if (VecWidth == 512 && 
EltWidth == 3212
)
13632
6
    IID = Intrinsic::x86_avx512_pternlog_d_512;
13633
12
  else if (VecWidth == 128 && 
EltWidth == 643
)
13634
3
    IID = Intrinsic::x86_avx512_pternlog_q_128;
13635
9
  else if (VecWidth == 256 && 
EltWidth == 643
)
13636
3
    IID = Intrinsic::x86_avx512_pternlog_q_256;
13637
6
  else if (VecWidth == 512 && EltWidth == 64)
13638
6
    IID = Intrinsic::x86_avx512_pternlog_q_512;
13639
0
  else
13640
0
    llvm_unreachable("Unexpected intrinsic");
13641
13642
24
  Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13643
24
                                          Ops.drop_back());
13644
24
  Value *PassThru = ZeroMask ? 
ConstantAggregateZero::get(Ty)8
:
Ops[0]16
;
13645
24
  return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
13646
24
}
13647
13648
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
13649
18
                              llvm::Type *DstTy) {
13650
18
  unsigned NumberOfElements =
13651
18
      cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13652
18
  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
13653
18
  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
13654
18
}
13655
13656
65
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
13657
65
  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
13658
65
  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
13659
65
  return EmitX86CpuIs(CPUStr);
13660
65
}
13661
13662
// Convert F16 halfs to floats.
13663
static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
13664
                                       ArrayRef<Value *> Ops,
13665
74
                                       llvm::Type *DstTy) {
13666
74
  assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
13667
74
         "Unknown cvtph2ps intrinsic");
13668
13669
  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
13670
74
  if (Ops.size() == 4 && 
cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 448
) {
13671
24
    Function *F =
13672
24
        CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
13673
24
    return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
13674
24
  }
13675
13676
50
  unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13677
50
  Value *Src = Ops[0];
13678
13679
  // Extract the subvector.
13680
50
  if (NumDstElts !=
13681
50
      cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
13682
14
    assert(NumDstElts == 4 && "Unexpected vector size");
13683
14
    Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
13684
14
  }
13685
13686
  // Bitcast from vXi16 to vXf16.
13687
50
  auto *HalfTy = llvm::FixedVectorType::get(
13688
50
      llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
13689
50
  Src = CGF.Builder.CreateBitCast(Src, HalfTy);
13690
13691
  // Perform the fp-extension.
13692
50
  Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
13693
13694
50
  if (Ops.size() >= 3)
13695
44
    Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13696
50
  return Res;
13697
50
}
13698
13699
201
Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
13700
13701
201
  llvm::Type *Int32Ty = Builder.getInt32Ty();
13702
13703
  // Matching the struct layout from the compiler-rt/libgcc structure that is
13704
  // filled in:
13705
  // unsigned int __cpu_vendor;
13706
  // unsigned int __cpu_type;
13707
  // unsigned int __cpu_subtype;
13708
  // unsigned int __cpu_features[1];
13709
201
  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
13710
201
                                          llvm::ArrayType::get(Int32Ty, 1));
13711
13712
  // Grab the global __cpu_model.
13713
201
  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
13714
201
  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
13715
13716
  // Calculate the index needed to access the correct field based on the
13717
  // range. Also adjust the expected value.
13718
201
  unsigned Index;
13719
201
  unsigned Value;
13720
201
  std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
13721
201
#define X86_VENDOR(ENUM, STRING)                                               \
13722
402
  .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
13723
201
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)                                        \
13724
804
  .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13725
201
#define X86_CPU_TYPE(ENUM, STR)                                                \
13726
3.81k
  .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13727
201
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)                                     \
13728
1.00k
  .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13729
201
#define X86_CPU_SUBTYPE(ENUM, STR)                                             \
13730
6.83k
  .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13731
201
#include "llvm/TargetParser/X86TargetParser.def"
13732
201
                               .Default({0, 0});
13733
201
  assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
13734
13735
  // Grab the appropriate field from __cpu_model.
13736
201
  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
13737
201
                         ConstantInt::get(Int32Ty, Index)};
13738
201
  llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
13739
201
  CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
13740
201
                                       CharUnits::fromQuantity(4));
13741
13742
  // Check the value of the field against the requested value.
13743
201
  return Builder.CreateICmpEQ(CpuValue,
13744
201
                                  llvm::ConstantInt::get(Int32Ty, Value));
13745
201
}
13746
13747
46
Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
13748
46
  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
13749
46
  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
13750
46
  return EmitX86CpuSupports(FeatureStr);
13751
46
}
13752
13753
199
Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
13754
199
  return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
13755
199
}
13756
13757
llvm::Value *
13758
199
CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
13759
199
  Value *Result = Builder.getTrue();
13760
199
  if (FeatureMask[0] != 0) {
13761
    // Matching the struct layout from the compiler-rt/libgcc structure that is
13762
    // filled in:
13763
    // unsigned int __cpu_vendor;
13764
    // unsigned int __cpu_type;
13765
    // unsigned int __cpu_subtype;
13766
    // unsigned int __cpu_features[1];
13767
180
    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
13768
180
                                            llvm::ArrayType::get(Int32Ty, 1));
13769
13770
    // Grab the global __cpu_model.
13771
180
    llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
13772
180
    cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
13773
13774
    // Grab the first (0th) element from the field __cpu_features off of the
13775
    // global in the struct STy.
13776
180
    Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
13777
180
                     Builder.getInt32(0)};
13778
180
    Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
13779
180
    Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
13780
180
                                                CharUnits::fromQuantity(4));
13781
13782
    // Check the value of the bit corresponding to the feature requested.
13783
180
    Value *Mask = Builder.getInt32(FeatureMask[0]);
13784
180
    Value *Bitset = Builder.CreateAnd(Features, Mask);
13785
180
    Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
13786
180
    Result = Builder.CreateAnd(Result, Cmp);
13787
180
  }
13788
13789
199
  llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
13790
199
  llvm::Constant *CpuFeatures2 =
13791
199
      CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
13792
199
  cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
13793
796
  for (int i = 1; i != 4; 
++i597
) {
13794
597
    const uint32_t M = FeatureMask[i];
13795
597
    if (!M)
13796
578
      continue;
13797
19
    Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
13798
19
    Value *Features = Builder.CreateAlignedLoad(
13799
19
        Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs),
13800
19
        CharUnits::fromQuantity(4));
13801
    // Check the value of the bit corresponding to the feature requested.
13802
19
    Value *Mask = Builder.getInt32(M);
13803
19
    Value *Bitset = Builder.CreateAnd(Features, Mask);
13804
19
    Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
13805
19
    Result = Builder.CreateAnd(Result, Cmp);
13806
19
  }
13807
13808
199
  return Result;
13809
199
}
13810
13811
18
Value *CodeGenFunction::EmitAArch64CpuInit() {
13812
18
  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
13813
18
  llvm::FunctionCallee Func =
13814
18
      CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
13815
18
  cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
13816
18
  cast<llvm::GlobalValue>(Func.getCallee())
13817
18
      ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
13818
18
  return Builder.CreateCall(Func);
13819
18
}
13820
13821
130
Value *CodeGenFunction::EmitX86CpuInit() {
13822
130
  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
13823
130
                                                    /*Variadic*/ false);
13824
130
  llvm::FunctionCallee Func =
13825
130
      CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
13826
130
  cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
13827
130
  cast<llvm::GlobalValue>(Func.getCallee())
13828
130
      ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
13829
130
  return Builder.CreateCall(Func);
13830
130
}
13831
13832
llvm::Value *
13833
47
CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
13834
47
  uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
13835
47
  Value *Result = Builder.getTrue();
13836
47
  if (FeaturesMask != 0) {
13837
    // Get features from structure in runtime library
13838
    // struct {
13839
    //   unsigned long long features;
13840
    // } __aarch64_cpu_features;
13841
47
    llvm::Type *STy = llvm::StructType::get(Int64Ty);
13842
47
    llvm::Constant *AArch64CPUFeatures =
13843
47
        CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
13844
47
    cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
13845
47
    llvm::Value *CpuFeatures = Builder.CreateGEP(
13846
47
        STy, AArch64CPUFeatures,
13847
47
        {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
13848
47
    Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
13849
47
                                                CharUnits::fromQuantity(8));
13850
47
    Value *Mask = Builder.getInt64(FeaturesMask);
13851
47
    Value *Bitset = Builder.CreateAnd(Features, Mask);
13852
47
    Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
13853
47
    Result = Builder.CreateAnd(Result, Cmp);
13854
47
  }
13855
47
  return Result;
13856
47
}
13857
13858
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
13859
9.30k
                                           const CallExpr *E) {
13860
9.30k
  if (BuiltinID == X86::BI__builtin_cpu_is)
13861
65
    return EmitX86CpuIs(E);
13862
9.24k
  if (BuiltinID == X86::BI__builtin_cpu_supports)
13863
46
    return EmitX86CpuSupports(E);
13864
9.19k
  if (BuiltinID == X86::BI__builtin_cpu_init)
13865
1
    return EmitX86CpuInit();
13866
13867
  // Handle MSVC intrinsics before argument evaluation to prevent double
13868
  // evaluation.
13869
9.19k
  if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
13870
39
    return EmitMSVCBuiltinExpr(*MsvcIntId, E);
13871
13872
9.15k
  SmallVector<Value*, 4> Ops;
13873
9.15k
  bool IsMaskFCmp = false;
13874
9.15k
  bool IsConjFMA = false;
13875
13876
  // Find out if any arguments are required to be integer constant expressions.
13877
9.15k
  unsigned ICEArguments = 0;
13878
9.15k
  ASTContext::GetBuiltinTypeError Error;
13879
9.15k
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
13880
9.15k
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
13881
13882
38.8k
  
for (unsigned i = 0, e = E->getNumArgs(); 9.15k
i != e;
i++29.6k
) {
13883
29.6k
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
13884
29.6k
  }
13885
13886
  // These exist so that the builtin that takes an immediate can be bounds
13887
  // checked by clang to avoid passing bad immediates to the backend. Since
13888
  // AVX has a larger immediate than SSE we would need separate builtins to
13889
  // do the different bounds checking. Rather than create a clang specific
13890
  // SSE only builtin, this implements eight separate builtins to match gcc
13891
  // implementation.
13892
9.15k
  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
13893
100
    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
13894
100
    llvm::Function *F = CGM.getIntrinsic(ID);
13895
100
    return Builder.CreateCall(F, Ops);
13896
100
  };
13897
13898
  // For the vector forms of FP comparisons, translate the builtins directly to
13899
  // IR.
13900
  // TODO: The builtins could be removed if the SSE header files used vector
13901
  // extension comparisons directly (vector ordered/unordered may need
13902
  // additional support via __builtin_isnan()).
13903
9.15k
  auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
13904
9.15k
                                         bool IsSignaling) {
13905
1.04k
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
13906
1.04k
    Value *Cmp;
13907
1.04k
    if (IsSignaling)
13908
528
      Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
13909
513
    else
13910
513
      Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
13911
1.04k
    llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
13912
1.04k
    llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
13913
1.04k
    Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
13914
1.04k
    return Builder.CreateBitCast(Sext, FPVecTy);
13915
1.04k
  };
13916
13917
9.15k
  switch (BuiltinID) {
13918
0
  default: return nullptr;
13919
0
  case X86::BI_mm_prefetch: {
13920
0
    Value *Address = Ops[0];
13921
0
    ConstantInt *C = cast<ConstantInt>(Ops[1]);
13922
0
    Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
13923
0
    Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
13924
0
    Value *Data = ConstantInt::get(Int32Ty, 1);
13925
0
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
13926
0
    return Builder.CreateCall(F, {Address, RW, Locality, Data});
13927
0
  }
13928
6
  case X86::BI_mm_clflush: {
13929
6
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
13930
6
                              Ops[0]);
13931
0
  }
13932
6
  case X86::BI_mm_lfence: {
13933
6
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
13934
0
  }
13935
6
  case X86::BI_mm_mfence: {
13936
6
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
13937
0
  }
13938
3
  case X86::BI_mm_sfence: {
13939
3
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
13940
0
  }
13941
8
  case X86::BI_mm_pause: {
13942
8
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
13943
0
  }
13944
3
  case X86::BI__rdtsc: {
13945
3
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
13946
0
  }
13947
3
  case X86::BI__builtin_ia32_rdtscp: {
13948
3
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
13949
3
    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
13950
3
                                      Ops[0]);
13951
3
    return Builder.CreateExtractValue(Call, 0);
13952
0
  }
13953
1
  case X86::BI__builtin_ia32_lzcnt_u16:
13954
3
  case X86::BI__builtin_ia32_lzcnt_u32:
13955
5
  case X86::BI__builtin_ia32_lzcnt_u64: {
13956
5
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
13957
5
    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
13958
3
  }
13959
2
  case X86::BI__builtin_ia32_tzcnt_u16:
13960
6
  case X86::BI__builtin_ia32_tzcnt_u32:
13961
10
  case X86::BI__builtin_ia32_tzcnt_u64: {
13962
10
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
13963
10
    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
13964
6
  }
13965
48
  case X86::BI__builtin_ia32_undef128:
13966
100
  case X86::BI__builtin_ia32_undef256:
13967
118
  case X86::BI__builtin_ia32_undef512:
13968
    // The x86 definition of "undef" is not the same as the LLVM definition
13969
    // (PR32176). We leave optimizing away an unnecessary zero constant to the
13970
    // IR optimizer and backend.
13971
    // TODO: If we had a "freeze" IR instruction to generate a fixed undef
13972
    // value, we should use that here instead of a zero.
13973
118
    return llvm::Constant::getNullValue(ConvertType(E->getType()));
13974
2
  case X86::BI__builtin_ia32_vec_init_v8qi:
13975
4
  case X86::BI__builtin_ia32_vec_init_v4hi:
13976
10
  case X86::BI__builtin_ia32_vec_init_v2si:
13977
10
    return Builder.CreateBitCast(BuildVector(Ops),
13978
10
                                 llvm::Type::getX86_MMXTy(getLLVMContext()));
13979
5
  case X86::BI__builtin_ia32_vec_ext_v2si:
13980
10
  case X86::BI__builtin_ia32_vec_ext_v16qi:
13981
16
  case X86::BI__builtin_ia32_vec_ext_v8hi:
13982
20
  case X86::BI__builtin_ia32_vec_ext_v4si:
13983
24
  case X86::BI__builtin_ia32_vec_ext_v4sf:
13984
28
  case X86::BI__builtin_ia32_vec_ext_v2di:
13985
33
  case X86::BI__builtin_ia32_vec_ext_v32qi:
13986
38
  case X86::BI__builtin_ia32_vec_ext_v16hi:
13987
46
  case X86::BI__builtin_ia32_vec_ext_v8si:
13988
49
  case X86::BI__builtin_ia32_vec_ext_v4di: {
13989
49
    unsigned NumElts =
13990
49
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13991
49
    uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
13992
49
    Index &= NumElts - 1;
13993
    // These builtins exist so we can ensure the index is an ICE and in range.
13994
    // Otherwise we could just do this in the header file.
13995
49
    return Builder.CreateExtractElement(Ops[0], Index);
13996
46
  }
13997
4
  case X86::BI__builtin_ia32_vec_set_v16qi:
13998
9
  case X86::BI__builtin_ia32_vec_set_v8hi:
13999
13
  case X86::BI__builtin_ia32_vec_set_v4si:
14000
15
  case X86::BI__builtin_ia32_vec_set_v2di:
14001
20
  case X86::BI__builtin_ia32_vec_set_v32qi:
14002
25
  case X86::BI__builtin_ia32_vec_set_v16hi:
14003
30
  case X86::BI__builtin_ia32_vec_set_v8si:
14004
33
  case X86::BI__builtin_ia32_vec_set_v4di: {
14005
33
    unsigned NumElts =
14006
33
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14007
33
    unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14008
33
    Index &= NumElts - 1;
14009
    // These builtins exist so we can ensure the index is an ICE and in range.
14010
    // Otherwise we could just do this in the header file.
14011
33
    return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14012
30
  }
14013
11
  case X86::BI_mm_setcsr:
14014
13
  case X86::BI__builtin_ia32_ldmxcsr: {
14015
13
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
14016
13
    Builder.CreateStore(Ops[0], Tmp);
14017
13
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14018
13
                              Tmp.getPointer());
14019
11
  }
14020
19
  case X86::BI_mm_getcsr:
14021
21
  case X86::BI__builtin_ia32_stmxcsr: {
14022
21
    Address Tmp = CreateMemTemp(E->getType());
14023
21
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14024
21
                       Tmp.getPointer());
14025
21
    return Builder.CreateLoad(Tmp, "stmxcsr");
14026
19
  }
14027
10
  case X86::BI__builtin_ia32_xsave:
14028
16
  case X86::BI__builtin_ia32_xsave64:
14029
26
  case X86::BI__builtin_ia32_xrstor:
14030
32
  case X86::BI__builtin_ia32_xrstor64:
14031
42
  case X86::BI__builtin_ia32_xsaveopt:
14032
48
  case X86::BI__builtin_ia32_xsaveopt64:
14033
58
  case X86::BI__builtin_ia32_xrstors:
14034
64
  case X86::BI__builtin_ia32_xrstors64:
14035
74
  case X86::BI__builtin_ia32_xsavec:
14036
80
  case X86::BI__builtin_ia32_xsavec64:
14037
90
  case X86::BI__builtin_ia32_xsaves:
14038
96
  case X86::BI__builtin_ia32_xsaves64:
14039
102
  case X86::BI__builtin_ia32_xsetbv:
14040
102
  case X86::BI_xsetbv: {
14041
102
    Intrinsic::ID ID;
14042
102
#define INTRINSIC_X86_XSAVE_ID(NAME) \
14043
102
    case X86::BI__builtin_ia32_##NAME: \
14044
102
      ID = Intrinsic::x86_##NAME; \
14045
102
      break
14046
102
    switch (BuiltinID) {
14047
0
    default: llvm_unreachable("Unsupported intrinsic!");
14048
10
    INTRINSIC_X86_XSAVE_ID
(xsave)0
;
14049
6
    INTRINSIC_X86_XSAVE_ID
(xsave64)0
;
14050
10
    INTRINSIC_X86_XSAVE_ID
(xrstor)0
;
14051
6
    INTRINSIC_X86_XSAVE_ID
(xrstor64)0
;
14052
10
    INTRINSIC_X86_XSAVE_ID
(xsaveopt)0
;
14053
6
    INTRINSIC_X86_XSAVE_ID
(xsaveopt64)0
;
14054
10
    INTRINSIC_X86_XSAVE_ID
(xrstors)0
;
14055
6
    INTRINSIC_X86_XSAVE_ID
(xrstors64)0
;
14056
10
    INTRINSIC_X86_XSAVE_ID
(xsavec)0
;
14057
6
    INTRINSIC_X86_XSAVE_ID
(xsavec64)0
;
14058
10
    INTRINSIC_X86_XSAVE_ID
(xsaves)0
;
14059
6
    INTRINSIC_X86_XSAVE_ID
(xsaves64)0
;
14060
6
    INTRINSIC_X86_XSAVE_ID
(xsetbv)0
;
14061
0
    case X86::BI_xsetbv:
14062
0
      ID = Intrinsic::x86_xsetbv;
14063
0
      break;
14064
102
    }
14065
102
#undef INTRINSIC_X86_XSAVE_ID
14066
102
    Value *Mhi = Builder.CreateTrunc(
14067
102
      Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14068
102
    Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14069
102
    Ops[1] = Mhi;
14070
102
    Ops.push_back(Mlo);
14071
102
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14072
102
  }
14073
6
  case X86::BI__builtin_ia32_xgetbv:
14074
8
  case X86::BI_xgetbv:
14075
8
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14076
1
  case X86::BI__builtin_ia32_storedqudi128_mask:
14077
2
  case X86::BI__builtin_ia32_storedqusi128_mask:
14078
4
  case X86::BI__builtin_ia32_storedquhi128_mask:
14079
6
  case X86::BI__builtin_ia32_storedquqi128_mask:
14080
7
  case X86::BI__builtin_ia32_storeupd128_mask:
14081
8
  case X86::BI__builtin_ia32_storeups128_mask:
14082
9
  case X86::BI__builtin_ia32_storedqudi256_mask:
14083
10
  case X86::BI__builtin_ia32_storedqusi256_mask:
14084
12
  case X86::BI__builtin_ia32_storedquhi256_mask:
14085
14
  case X86::BI__builtin_ia32_storedquqi256_mask:
14086
15
  case X86::BI__builtin_ia32_storeupd256_mask:
14087
16
  case X86::BI__builtin_ia32_storeups256_mask:
14088
18
  case X86::BI__builtin_ia32_storedqudi512_mask:
14089
20
  case X86::BI__builtin_ia32_storedqusi512_mask:
14090
22
  case X86::BI__builtin_ia32_storedquhi512_mask:
14091
24
  case X86::BI__builtin_ia32_storedquqi512_mask:
14092
24
  case X86::BI__builtin_ia32_storeupd512_mask:
14093
24
  case X86::BI__builtin_ia32_storeups512_mask:
14094
24
    return EmitX86MaskedStore(*this, Ops, Align(1));
14095
14096
1
  case X86::BI__builtin_ia32_storesh128_mask:
14097
3
  case X86::BI__builtin_ia32_storess128_mask:
14098
5
  case X86::BI__builtin_ia32_storesd128_mask:
14099
5
    return EmitX86MaskedStore(*this, Ops, Align(1));
14100
14101
1
  case X86::BI__builtin_ia32_vpopcntb_128:
14102
2
  case X86::BI__builtin_ia32_vpopcntd_128:
14103
3
  case X86::BI__builtin_ia32_vpopcntq_128:
14104
4
  case X86::BI__builtin_ia32_vpopcntw_128:
14105
5
  case X86::BI__builtin_ia32_vpopcntb_256:
14106
6
  case X86::BI__builtin_ia32_vpopcntd_256:
14107
7
  case X86::BI__builtin_ia32_vpopcntq_256:
14108
8
  case X86::BI__builtin_ia32_vpopcntw_256:
14109
9
  case X86::BI__builtin_ia32_vpopcntb_512:
14110
10
  case X86::BI__builtin_ia32_vpopcntd_512:
14111
11
  case X86::BI__builtin_ia32_vpopcntq_512:
14112
12
  case X86::BI__builtin_ia32_vpopcntw_512: {
14113
12
    llvm::Type *ResultType = ConvertType(E->getType());
14114
12
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14115
12
    return Builder.CreateCall(F, Ops);
14116
11
  }
14117
2
  case X86::BI__builtin_ia32_cvtmask2b128:
14118
4
  case X86::BI__builtin_ia32_cvtmask2b256:
14119
6
  case X86::BI__builtin_ia32_cvtmask2b512:
14120
8
  case X86::BI__builtin_ia32_cvtmask2w128:
14121
10
  case X86::BI__builtin_ia32_cvtmask2w256:
14122
12
  case X86::BI__builtin_ia32_cvtmask2w512:
14123
13
  case X86::BI__builtin_ia32_cvtmask2d128:
14124
14
  case X86::BI__builtin_ia32_cvtmask2d256:
14125
15
  case X86::BI__builtin_ia32_cvtmask2d512:
14126
16
  case X86::BI__builtin_ia32_cvtmask2q128:
14127
17
  case X86::BI__builtin_ia32_cvtmask2q256:
14128
18
  case X86::BI__builtin_ia32_cvtmask2q512:
14129
18
    return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14130
14131
2
  case X86::BI__builtin_ia32_cvtb2mask128:
14132
4
  case X86::BI__builtin_ia32_cvtb2mask256:
14133
6
  case X86::BI__builtin_ia32_cvtb2mask512:
14134
8
  case X86::BI__builtin_ia32_cvtw2mask128:
14135
10
  case X86::BI__builtin_ia32_cvtw2mask256:
14136
12
  case X86::BI__builtin_ia32_cvtw2mask512:
14137
13
  case X86::BI__builtin_ia32_cvtd2mask128:
14138
14
  case X86::BI__builtin_ia32_cvtd2mask256:
14139
15
  case X86::BI__builtin_ia32_cvtd2mask512:
14140
16
  case X86::BI__builtin_ia32_cvtq2mask128:
14141
17
  case X86::BI__builtin_ia32_cvtq2mask256:
14142
18
  case X86::BI__builtin_ia32_cvtq2mask512:
14143
18
    return EmitX86ConvertToMask(*this, Ops[0]);
14144
14145
6
  case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14146
36
  case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14147
51
  case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14148
57
  case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14149
63
  case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14150
69
  case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14151
69
    return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14152
6
  case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14153
36
  case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14154
51
  case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14155
57
  case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14156
63
  case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14157
69
  case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14158
69
    return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14159
14160
20
  case X86::BI__builtin_ia32_vfmaddss3:
14161
40
  case X86::BI__builtin_ia32_vfmaddsd3:
14162
56
  case X86::BI__builtin_ia32_vfmaddsh3_mask:
14163
80
  case X86::BI__builtin_ia32_vfmaddss3_mask:
14164
104
  case X86::BI__builtin_ia32_vfmaddsd3_mask:
14165
104
    return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14166
8
  case X86::BI__builtin_ia32_vfmaddss:
14167
16
  case X86::BI__builtin_ia32_vfmaddsd:
14168
16
    return EmitScalarFMAExpr(*this, E, Ops,
14169
16
                             Constant::getNullValue(Ops[0]->getType()));
14170
8
  case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14171
24
  case X86::BI__builtin_ia32_vfmaddss3_maskz:
14172
40
  case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14173
40
    return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14174
4
  case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14175
12
  case X86::BI__builtin_ia32_vfmaddss3_mask3:
14176
20
  case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14177
20
    return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14178
4
  case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14179
12
  case X86::BI__builtin_ia32_vfmsubss3_mask3:
14180
20
  case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14181
20
    return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14182
20
                             /*NegAcc*/ true);
14183
14
  case X86::BI__builtin_ia32_vfmaddph:
14184
57
  case X86::BI__builtin_ia32_vfmaddps:
14185
97
  case X86::BI__builtin_ia32_vfmaddpd:
14186
113
  case X86::BI__builtin_ia32_vfmaddph256:
14187
153
  case X86::BI__builtin_ia32_vfmaddps256:
14188
193
  case X86::BI__builtin_ia32_vfmaddpd256:
14189
209
  case X86::BI__builtin_ia32_vfmaddph512_mask:
14190
217
  case X86::BI__builtin_ia32_vfmaddph512_maskz:
14191
221
  case X86::BI__builtin_ia32_vfmaddph512_mask3:
14192
253
  case X86::BI__builtin_ia32_vfmaddps512_mask:
14193
269
  case X86::BI__builtin_ia32_vfmaddps512_maskz:
14194
277
  case X86::BI__builtin_ia32_vfmaddps512_mask3:
14195
285
  case X86::BI__builtin_ia32_vfmsubps512_mask3:
14196
317
  case X86::BI__builtin_ia32_vfmaddpd512_mask:
14197
333
  case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14198
341
  case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14199
349
  case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14200
353
  case X86::BI__builtin_ia32_vfmsubph512_mask3:
14201
353
    return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14202
8
  case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14203
12
  case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14204
14
  case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14205
16
  case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14206
32
  case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14207
40
  case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14208
44
  case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14209
48
  case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14210
64
  case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14211
72
  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14212
76
  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14213
80
  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14214
80
    return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14215
14216
1
  case X86::BI__builtin_ia32_movdqa32store128_mask:
14217
2
  case X86::BI__builtin_ia32_movdqa64store128_mask:
14218
3
  case X86::BI__builtin_ia32_storeaps128_mask:
14219
4
  case X86::BI__builtin_ia32_storeapd128_mask:
14220
5
  case X86::BI__builtin_ia32_movdqa32store256_mask:
14221
6
  case X86::BI__builtin_ia32_movdqa64store256_mask:
14222
7
  case X86::BI__builtin_ia32_storeaps256_mask:
14223
8
  case X86::BI__builtin_ia32_storeapd256_mask:
14224
10
  case X86::BI__builtin_ia32_movdqa32store512_mask:
14225
12
  case X86::BI__builtin_ia32_movdqa64store512_mask:
14226
14
  case X86::BI__builtin_ia32_storeaps512_mask:
14227
16
  case X86::BI__builtin_ia32_storeapd512_mask:
14228
16
    return EmitX86MaskedStore(
14229
16
        *this, Ops,
14230
16
        getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14231
14232
2
  case X86::BI__builtin_ia32_loadups128_mask:
14233
4
  case X86::BI__builtin_ia32_loadups256_mask:
14234
6
  case X86::BI__builtin_ia32_loadups512_mask:
14235
8
  case X86::BI__builtin_ia32_loadupd128_mask:
14236
10
  case X86::BI__builtin_ia32_loadupd256_mask:
14237
12
  case X86::BI__builtin_ia32_loadupd512_mask:
14238
16
  case X86::BI__builtin_ia32_loaddquqi128_mask:
14239
20
  case X86::BI__builtin_ia32_loaddquqi256_mask:
14240
24
  case X86::BI__builtin_ia32_loaddquqi512_mask:
14241
28
  case X86::BI__builtin_ia32_loaddquhi128_mask:
14242
32
  case X86::BI__builtin_ia32_loaddquhi256_mask:
14243
36
  case X86::BI__builtin_ia32_loaddquhi512_mask:
14244
38
  case X86::BI__builtin_ia32_loaddqusi128_mask:
14245
40
  case X86::BI__builtin_ia32_loaddqusi256_mask:
14246
44
  case X86::BI__builtin_ia32_loaddqusi512_mask:
14247
46
  case X86::BI__builtin_ia32_loaddqudi128_mask:
14248
48
  case X86::BI__builtin_ia32_loaddqudi256_mask:
14249
52
  case X86::BI__builtin_ia32_loaddqudi512_mask:
14250
52
    return EmitX86MaskedLoad(*this, Ops, Align(1));
14251
14252
2
  case X86::BI__builtin_ia32_loadsh128_mask:
14253
6
  case X86::BI__builtin_ia32_loadss128_mask:
14254
10
  case X86::BI__builtin_ia32_loadsd128_mask:
14255
10
    return EmitX86MaskedLoad(*this, Ops, Align(1));
14256
14257
2
  case X86::BI__builtin_ia32_loadaps128_mask:
14258
4
  case X86::BI__builtin_ia32_loadaps256_mask:
14259
8
  case X86::BI__builtin_ia32_loadaps512_mask:
14260
10
  case X86::BI__builtin_ia32_loadapd128_mask:
14261
12
  case X86::BI__builtin_ia32_loadapd256_mask:
14262
16
  case X86::BI__builtin_ia32_loadapd512_mask:
14263
18
  case X86::BI__builtin_ia32_movdqa32load128_mask:
14264
20
  case X86::BI__builtin_ia32_movdqa32load256_mask:
14265
24
  case X86::BI__builtin_ia32_movdqa32load512_mask:
14266
26
  case X86::BI__builtin_ia32_movdqa64load128_mask:
14267
28
  case X86::BI__builtin_ia32_movdqa64load256_mask:
14268
32
  case X86::BI__builtin_ia32_movdqa64load512_mask:
14269
32
    return EmitX86MaskedLoad(
14270
32
        *this, Ops,
14271
32
        getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14272
14273
2
  case X86::BI__builtin_ia32_expandloaddf128_mask:
14274
4
  case X86::BI__builtin_ia32_expandloaddf256_mask:
14275
8
  case X86::BI__builtin_ia32_expandloaddf512_mask:
14276
10
  case X86::BI__builtin_ia32_expandloadsf128_mask:
14277
12
  case X86::BI__builtin_ia32_expandloadsf256_mask:
14278
16
  case X86::BI__builtin_ia32_expandloadsf512_mask:
14279
18
  case X86::BI__builtin_ia32_expandloaddi128_mask:
14280
20
  case X86::BI__builtin_ia32_expandloaddi256_mask:
14281
24
  case X86::BI__builtin_ia32_expandloaddi512_mask:
14282
26
  case X86::BI__builtin_ia32_expandloadsi128_mask:
14283
28
  case X86::BI__builtin_ia32_expandloadsi256_mask:
14284
32
  case X86::BI__builtin_ia32_expandloadsi512_mask:
14285
34
  case X86::BI__builtin_ia32_expandloadhi128_mask:
14286
36
  case X86::BI__builtin_ia32_expandloadhi256_mask:
14287
38
  case X86::BI__builtin_ia32_expandloadhi512_mask:
14288
40
  case X86::BI__builtin_ia32_expandloadqi128_mask:
14289
42
  case X86::BI__builtin_ia32_expandloadqi256_mask:
14290
44
  case X86::BI__builtin_ia32_expandloadqi512_mask:
14291
44
    return EmitX86ExpandLoad(*this, Ops);
14292
14293
1
  case X86::BI__builtin_ia32_compressstoredf128_mask:
14294
2
  case X86::BI__builtin_ia32_compressstoredf256_mask:
14295
4
  case X86::BI__builtin_ia32_compressstoredf512_mask:
14296
5
  case X86::BI__builtin_ia32_compressstoresf128_mask:
14297
6
  case X86::BI__builtin_ia32_compressstoresf256_mask:
14298
8
  case X86::BI__builtin_ia32_compressstoresf512_mask:
14299
9
  case X86::BI__builtin_ia32_compressstoredi128_mask:
14300
10
  case X86::BI__builtin_ia32_compressstoredi256_mask:
14301
12
  case X86::BI__builtin_ia32_compressstoredi512_mask:
14302
13
  case X86::BI__builtin_ia32_compressstoresi128_mask:
14303
14
  case X86::BI__builtin_ia32_compressstoresi256_mask:
14304
16
  case X86::BI__builtin_ia32_compressstoresi512_mask:
14305
17
  case X86::BI__builtin_ia32_compressstorehi128_mask:
14306
18
  case X86::BI__builtin_ia32_compressstorehi256_mask:
14307
19
  case X86::BI__builtin_ia32_compressstorehi512_mask:
14308
20
  case X86::BI__builtin_ia32_compressstoreqi128_mask:
14309
21
  case X86::BI__builtin_ia32_compressstoreqi256_mask:
14310
22
  case X86::BI__builtin_ia32_compressstoreqi512_mask:
14311
22
    return EmitX86CompressStore(*this, Ops);
14312
14313
2
  case X86::BI__builtin_ia32_expanddf128_mask:
14314
4
  case X86::BI__builtin_ia32_expanddf256_mask:
14315
8
  case X86::BI__builtin_ia32_expanddf512_mask:
14316
10
  case X86::BI__builtin_ia32_expandsf128_mask:
14317
12
  case X86::BI__builtin_ia32_expandsf256_mask:
14318
16
  case X86::BI__builtin_ia32_expandsf512_mask:
14319
18
  case X86::BI__builtin_ia32_expanddi128_mask:
14320
20
  case X86::BI__builtin_ia32_expanddi256_mask:
14321
24
  case X86::BI__builtin_ia32_expanddi512_mask:
14322
26
  case X86::BI__builtin_ia32_expandsi128_mask:
14323
28
  case X86::BI__builtin_ia32_expandsi256_mask:
14324
32
  case X86::BI__builtin_ia32_expandsi512_mask:
14325
34
  case X86::BI__builtin_ia32_expandhi128_mask:
14326
36
  case X86::BI__builtin_ia32_expandhi256_mask:
14327
38
  case X86::BI__builtin_ia32_expandhi512_mask:
14328
40
  case X86::BI__builtin_ia32_expandqi128_mask:
14329
42
  case X86::BI__builtin_ia32_expandqi256_mask:
14330
44
  case X86::BI__builtin_ia32_expandqi512_mask:
14331
44
    return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14332
14333
2
  case X86::BI__builtin_ia32_compressdf128_mask:
14334
4
  case X86::BI__builtin_ia32_compressdf256_mask:
14335
8
  case X86::BI__builtin_ia32_compressdf512_mask:
14336
10
  case X86::BI__builtin_ia32_compresssf128_mask:
14337
12
  case X86::BI__builtin_ia32_compresssf256_mask:
14338
16
  case X86::BI__builtin_ia32_compresssf512_mask:
14339
18
  case X86::BI__builtin_ia32_compressdi128_mask:
14340
20
  case X86::BI__builtin_ia32_compressdi256_mask:
14341
24
  case X86::BI__builtin_ia32_compressdi512_mask:
14342
26
  case X86::BI__builtin_ia32_compresssi128_mask:
14343
28
  case X86::BI__builtin_ia32_compresssi256_mask:
14344
32
  case X86::BI__builtin_ia32_compresssi512_mask:
14345
34
  case X86::BI__builtin_ia32_compresshi128_mask:
14346
36
  case X86::BI__builtin_ia32_compresshi256_mask:
14347
38
  case X86::BI__builtin_ia32_compresshi512_mask:
14348
40
  case X86::BI__builtin_ia32_compressqi128_mask:
14349
42
  case X86::BI__builtin_ia32_compressqi256_mask:
14350
44
  case X86::BI__builtin_ia32_compressqi512_mask:
14351
44
    return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14352
14353
1
  case X86::BI__builtin_ia32_gather3div2df:
14354
2
  case X86::BI__builtin_ia32_gather3div2di:
14355
3
  case X86::BI__builtin_ia32_gather3div4df:
14356
4
  case X86::BI__builtin_ia32_gather3div4di:
14357
5
  case X86::BI__builtin_ia32_gather3div4sf:
14358
6
  case X86::BI__builtin_ia32_gather3div4si:
14359
7
  case X86::BI__builtin_ia32_gather3div8sf:
14360
8
  case X86::BI__builtin_ia32_gather3div8si:
14361
9
  case X86::BI__builtin_ia32_gather3siv2df:
14362
10
  case X86::BI__builtin_ia32_gather3siv2di:
14363
11
  case X86::BI__builtin_ia32_gather3siv4df:
14364
12
  case X86::BI__builtin_ia32_gather3siv4di:
14365
13
  case X86::BI__builtin_ia32_gather3siv4sf:
14366
14
  case X86::BI__builtin_ia32_gather3siv4si:
14367
15
  case X86::BI__builtin_ia32_gather3siv8sf:
14368
16
  case X86::BI__builtin_ia32_gather3siv8si:
14369
24
  case X86::BI__builtin_ia32_gathersiv8df:
14370
28
  case X86::BI__builtin_ia32_gathersiv16sf:
14371
32
  case X86::BI__builtin_ia32_gatherdiv8df:
14372
36
  case X86::BI__builtin_ia32_gatherdiv16sf:
14373
44
  case X86::BI__builtin_ia32_gathersiv8di:
14374
48
  case X86::BI__builtin_ia32_gathersiv16si:
14375
52
  case X86::BI__builtin_ia32_gatherdiv8di:
14376
56
  case X86::BI__builtin_ia32_gatherdiv16si: {
14377
56
    Intrinsic::ID IID;
14378
56
    switch (BuiltinID) {
14379
0
    default: llvm_unreachable("Unexpected builtin");
14380
1
    case X86::BI__builtin_ia32_gather3div2df:
14381
1
      IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14382
1
      break;
14383
1
    case X86::BI__builtin_ia32_gather3div2di:
14384
1
      IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14385
1
      break;
14386
1
    case X86::BI__builtin_ia32_gather3div4df:
14387
1
      IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14388
1
      break;
14389
1
    case X86::BI__builtin_ia32_gather3div4di:
14390
1
      IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14391
1
      break;
14392
1
    case X86::BI__builtin_ia32_gather3div4sf:
14393
1
      IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14394
1
      break;
14395
1
    case X86::BI__builtin_ia32_gather3div4si:
14396
1
      IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14397
1
      break;
14398
1
    case X86::BI__builtin_ia32_gather3div8sf:
14399
1
      IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14400
1
      break;
14401
1
    case X86::BI__builtin_ia32_gather3div8si:
14402
1
      IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14403
1
      break;
14404
1
    case X86::BI__builtin_ia32_gather3siv2df:
14405
1
      IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14406
1
      break;
14407
1
    case X86::BI__builtin_ia32_gather3siv2di:
14408
1
      IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14409
1
      break;
14410
1
    case X86::BI__builtin_ia32_gather3siv4df:
14411
1
      IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14412
1
      break;
14413
1
    case X86::BI__builtin_ia32_gather3siv4di:
14414
1
      IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14415
1
      break;
14416
1
    case X86::BI__builtin_ia32_gather3siv4sf:
14417
1
      IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14418
1
      break;
14419
1
    case X86::BI__builtin_ia32_gather3siv4si:
14420
1
      IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14421
1
      break;
14422
1
    case X86::BI__builtin_ia32_gather3siv8sf:
14423
1
      IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14424
1
      break;
14425
1
    case X86::BI__builtin_ia32_gather3siv8si:
14426
1
      IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14427
1
      break;
14428
8
    case X86::BI__builtin_ia32_gathersiv8df:
14429
8
      IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14430
8
      break;
14431
4
    case X86::BI__builtin_ia32_gathersiv16sf:
14432
4
      IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14433
4
      break;
14434
4
    case X86::BI__builtin_ia32_gatherdiv8df:
14435
4
      IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14436
4
      break;
14437
4
    case X86::BI__builtin_ia32_gatherdiv16sf:
14438
4
      IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14439
4
      break;
14440
8
    case X86::BI__builtin_ia32_gathersiv8di:
14441
8
      IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14442
8
      break;
14443
4
    case X86::BI__builtin_ia32_gathersiv16si:
14444
4
      IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14445
4
      break;
14446
4
    case X86::BI__builtin_ia32_gatherdiv8di:
14447
4
      IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14448
4
      break;
14449
4
    case X86::BI__builtin_ia32_gatherdiv16si:
14450
4
      IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14451
4
      break;
14452
56
    }
14453
14454
56
    unsigned MinElts = std::min(
14455
56
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14456
56
        cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14457
56
    Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14458
56
    Function *Intr = CGM.getIntrinsic(IID);
14459
56
    return Builder.CreateCall(Intr, Ops);
14460
56
  }
14461
14462
8
  case X86::BI__builtin_ia32_scattersiv8df:
14463
12
  case X86::BI__builtin_ia32_scattersiv16sf:
14464
16
  case X86::BI__builtin_ia32_scatterdiv8df:
14465
20
  case X86::BI__builtin_ia32_scatterdiv16sf:
14466
28
  case X86::BI__builtin_ia32_scattersiv8di:
14467
32
  case X86::BI__builtin_ia32_scattersiv16si:
14468
36
  case X86::BI__builtin_ia32_scatterdiv8di:
14469
40
  case X86::BI__builtin_ia32_scatterdiv16si:
14470
42
  case X86::BI__builtin_ia32_scatterdiv2df:
14471
44
  case X86::BI__builtin_ia32_scatterdiv2di:
14472
46
  case X86::BI__builtin_ia32_scatterdiv4df:
14473
48
  case X86::BI__builtin_ia32_scatterdiv4di:
14474
50
  case X86::BI__builtin_ia32_scatterdiv4sf:
14475
52
  case X86::BI__builtin_ia32_scatterdiv4si:
14476
54
  case X86::BI__builtin_ia32_scatterdiv8sf:
14477
56
  case X86::BI__builtin_ia32_scatterdiv8si:
14478
58
  case X86::BI__builtin_ia32_scattersiv2df:
14479
60
  case X86::BI__builtin_ia32_scattersiv2di:
14480
62
  case X86::BI__builtin_ia32_scattersiv4df:
14481
64
  case X86::BI__builtin_ia32_scattersiv4di:
14482
66
  case X86::BI__builtin_ia32_scattersiv4sf:
14483
68
  case X86::BI__builtin_ia32_scattersiv4si:
14484
70
  case X86::BI__builtin_ia32_scattersiv8sf:
14485
72
  case X86::BI__builtin_ia32_scattersiv8si: {
14486
72
    Intrinsic::ID IID;
14487
72
    switch (BuiltinID) {
14488
0
    default: llvm_unreachable("Unexpected builtin");
14489
8
    case X86::BI__builtin_ia32_scattersiv8df:
14490
8
      IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14491
8
      break;
14492
4
    case X86::BI__builtin_ia32_scattersiv16sf:
14493
4
      IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14494
4
      break;
14495
4
    case X86::BI__builtin_ia32_scatterdiv8df:
14496
4
      IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14497
4
      break;
14498
4
    case X86::BI__builtin_ia32_scatterdiv16sf:
14499
4
      IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
14500
4
      break;
14501
8
    case X86::BI__builtin_ia32_scattersiv8di:
14502
8
      IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
14503
8
      break;
14504
4
    case X86::BI__builtin_ia32_scattersiv16si:
14505
4
      IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
14506
4
      break;
14507
4
    case X86::BI__builtin_ia32_scatterdiv8di:
14508
4
      IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
14509
4
      break;
14510
4
    case X86::BI__builtin_ia32_scatterdiv16si:
14511
4
      IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
14512
4
      break;
14513
2
    case X86::BI__builtin_ia32_scatterdiv2df:
14514
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
14515
2
      break;
14516
2
    case X86::BI__builtin_ia32_scatterdiv2di:
14517
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
14518
2
      break;
14519
2
    case X86::BI__builtin_ia32_scatterdiv4df:
14520
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
14521
2
      break;
14522
2
    case X86::BI__builtin_ia32_scatterdiv4di:
14523
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
14524
2
      break;
14525
2
    case X86::BI__builtin_ia32_scatterdiv4sf:
14526
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
14527
2
      break;
14528
2
    case X86::BI__builtin_ia32_scatterdiv4si:
14529
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
14530
2
      break;
14531
2
    case X86::BI__builtin_ia32_scatterdiv8sf:
14532
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
14533
2
      break;
14534
2
    case X86::BI__builtin_ia32_scatterdiv8si:
14535
2
      IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
14536
2
      break;
14537
2
    case X86::BI__builtin_ia32_scattersiv2df:
14538
2
      IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
14539
2
      break;
14540
2
    case X86::BI__builtin_ia32_scattersiv2di:
14541
2
      IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
14542
2
      break;
14543
2
    case X86::BI__builtin_ia32_scattersiv4df:
14544
2
      IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
14545
2
      break;
14546
2
    case X86::BI__builtin_ia32_scattersiv4di:
14547
2
      IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
14548
2
      break;
14549
2
    case X86::BI__builtin_ia32_scattersiv4sf:
14550
2
      IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
14551
2
      break;
14552
2
    case X86::BI__builtin_ia32_scattersiv4si:
14553
2
      IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
14554
2
      break;
14555
2
    case X86::BI__builtin_ia32_scattersiv8sf:
14556
2
      IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
14557
2
      break;
14558
2
    case X86::BI__builtin_ia32_scattersiv8si:
14559
2
      IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
14560
2
      break;
14561
72
    }
14562
14563
72
    unsigned MinElts = std::min(
14564
72
        cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
14565
72
        cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
14566
72
    Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
14567
72
    Function *Intr = CGM.getIntrinsic(IID);
14568
72
    return Builder.CreateCall(Intr, Ops);
14569
72
  }
14570
14571
14
  case X86::BI__builtin_ia32_vextractf128_pd256:
14572
28
  case X86::BI__builtin_ia32_vextractf128_ps256:
14573
42
  case X86::BI__builtin_ia32_vextractf128_si256:
14574
54
  case X86::BI__builtin_ia32_extract128i256:
14575
60
  case X86::BI__builtin_ia32_extractf64x4_mask:
14576
66
  case X86::BI__builtin_ia32_extractf32x4_mask:
14577
72
  case X86::BI__builtin_ia32_extracti64x4_mask:
14578
78
  case X86::BI__builtin_ia32_extracti32x4_mask:
14579
81
  case X86::BI__builtin_ia32_extractf32x8_mask:
14580
84
  case X86::BI__builtin_ia32_extracti32x8_mask:
14581
87
  case X86::BI__builtin_ia32_extractf32x4_256_mask:
14582
90
  case X86::BI__builtin_ia32_extracti32x4_256_mask:
14583
93
  case X86::BI__builtin_ia32_extractf64x2_256_mask:
14584
96
  case X86::BI__builtin_ia32_extracti64x2_256_mask:
14585
99
  case X86::BI__builtin_ia32_extractf64x2_512_mask:
14586
102
  case X86::BI__builtin_ia32_extracti64x2_512_mask: {
14587
102
    auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
14588
102
    unsigned NumElts = DstTy->getNumElements();
14589
102
    unsigned SrcNumElts =
14590
102
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14591
102
    unsigned SubVectors = SrcNumElts / NumElts;
14592
102
    unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14593
102
    assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14594
102
    Index &= SubVectors - 1; // Remove any extra bits.
14595
102
    Index *= NumElts;
14596
14597
102
    int Indices[16];
14598
458
    for (unsigned i = 0; i != NumElts; 
++i356
)
14599
356
      Indices[i] = i + Index;
14600
14601
102
    Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14602
102
                                             "extract");
14603
14604
102
    if (Ops.size() == 4)
14605
48
      Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
14606
14607
102
    return Res;
14608
102
  }
14609
9
  case X86::BI__builtin_ia32_vinsertf128_pd256:
14610
18
  case X86::BI__builtin_ia32_vinsertf128_ps256:
14611
27
  case X86::BI__builtin_ia32_vinsertf128_si256:
14612
39
  case X86::BI__builtin_ia32_insert128i256:
14613
45
  case X86::BI__builtin_ia32_insertf64x4:
14614
51
  case X86::BI__builtin_ia32_insertf32x4:
14615
57
  case X86::BI__builtin_ia32_inserti64x4:
14616
63
  case X86::BI__builtin_ia32_inserti32x4:
14617
66
  case X86::BI__builtin_ia32_insertf32x8:
14618
69
  case X86::BI__builtin_ia32_inserti32x8:
14619
72
  case X86::BI__builtin_ia32_insertf32x4_256:
14620
75
  case X86::BI__builtin_ia32_inserti32x4_256:
14621
78
  case X86::BI__builtin_ia32_insertf64x2_256:
14622
81
  case X86::BI__builtin_ia32_inserti64x2_256:
14623
84
  case X86::BI__builtin_ia32_insertf64x2_512:
14624
87
  case X86::BI__builtin_ia32_inserti64x2_512: {
14625
87
    unsigned DstNumElts =
14626
87
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14627
87
    unsigned SrcNumElts =
14628
87
        cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
14629
87
    unsigned SubVectors = DstNumElts / SrcNumElts;
14630
87
    unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14631
87
    assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14632
87
    Index &= SubVectors - 1; // Remove any extra bits.
14633
87
    Index *= SrcNumElts;
14634
14635
87
    int Indices[16];
14636
819
    for (unsigned i = 0; i != DstNumElts; 
++i732
)
14637
732
      Indices[i] = (i >= SrcNumElts) ? 
SrcNumElts + (i % SrcNumElts)426
:
i306
;
14638
14639
87
    Value *Op1 = Builder.CreateShuffleVector(
14640
87
        Ops[1], ArrayRef(Indices, DstNumElts), "widen");
14641
14642
819
    for (unsigned i = 0; i != DstNumElts; 
++i732
) {
14643
732
      if (i >= Index && 
i < (Index + SrcNumElts)480
)
14644
306
        Indices[i] = (i - Index) + DstNumElts;
14645
426
      else
14646
426
        Indices[i] = i;
14647
732
    }
14648
14649
87
    return Builder.CreateShuffleVector(Ops[0], Op1,
14650
87
                                       ArrayRef(Indices, DstNumElts), "insert");
14651
87
  }
14652
6
  case X86::BI__builtin_ia32_pmovqd512_mask:
14653
12
  case X86::BI__builtin_ia32_pmovwb512_mask: {
14654
12
    Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14655
12
    return EmitX86Select(*this, Ops[2], Res, Ops[1]);
14656
6
  }
14657
6
  case X86::BI__builtin_ia32_pmovdb512_mask:
14658
12
  case X86::BI__builtin_ia32_pmovdw512_mask:
14659
18
  case X86::BI__builtin_ia32_pmovqw512_mask: {
14660
18
    if (const auto *C = dyn_cast<Constant>(Ops[2]))
14661
6
      if (C->isAllOnesValue())
14662
6
        return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14663
14664
12
    Intrinsic::ID IID;
14665
12
    switch (BuiltinID) {
14666
0
    default: llvm_unreachable("Unsupported intrinsic!");
14667
4
    case X86::BI__builtin_ia32_pmovdb512_mask:
14668
4
      IID = Intrinsic::x86_avx512_mask_pmov_db_512;
14669
4
      break;
14670
4
    case X86::BI__builtin_ia32_pmovdw512_mask:
14671
4
      IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
14672
4
      break;
14673
4
    case X86::BI__builtin_ia32_pmovqw512_mask:
14674
4
      IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
14675
4
      break;
14676
12
    }
14677
14678
12
    Function *Intr = CGM.getIntrinsic(IID);
14679
12
    return Builder.CreateCall(Intr, Ops);
14680
12
  }
14681
4
  case X86::BI__builtin_ia32_pblendw128:
14682
8
  case X86::BI__builtin_ia32_blendpd:
14683
12
  case X86::BI__builtin_ia32_blendps:
14684
17
  case X86::BI__builtin_ia32_blendpd256:
14685
22
  case X86::BI__builtin_ia32_blendps256:
14686
26
  case X86::BI__builtin_ia32_pblendw256:
14687
30
  case X86::BI__builtin_ia32_pblendd128:
14688
34
  case X86::BI__builtin_ia32_pblendd256: {
14689
34
    unsigned NumElts =
14690
34
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14691
34
    unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14692
14693
34
    int Indices[16];
14694
    // If there are more than 8 elements, the immediate is used twice so make
14695
    // sure we handle that.
14696
262
    for (unsigned i = 0; i != NumElts; 
++i228
)
14697
228
      Indices[i] = ((Imm >> (i % 8)) & 0x1) ? 
NumElts + i86
:
i142
;
14698
14699
34
    return Builder.CreateShuffleVector(Ops[0], Ops[1],
14700
34
                                       ArrayRef(Indices, NumElts), "blend");
14701
30
  }
14702
9
  case X86::BI__builtin_ia32_pshuflw:
14703
17
  case X86::BI__builtin_ia32_pshuflw256:
14704
23
  case X86::BI__builtin_ia32_pshuflw512: {
14705
23
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14706
23
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14707
23
    unsigned NumElts = Ty->getNumElements();
14708
14709
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14710
23
    Imm = (Imm & 0xff) * 0x01010101;
14711
14712
23
    int Indices[32];
14713
72
    for (unsigned l = 0; l != NumElts; 
l += 849
) {
14714
245
      for (unsigned i = 0; i != 4; 
++i196
) {
14715
196
        Indices[l + i] = l + (Imm & 3);
14716
196
        Imm >>= 2;
14717
196
      }
14718
245
      for (unsigned i = 4; i != 8; 
++i196
)
14719
196
        Indices[l + i] = l + i;
14720
49
    }
14721
14722
23
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14723
23
                                       "pshuflw");
14724
17
  }
14725
9
  case X86::BI__builtin_ia32_pshufhw:
14726
17
  case X86::BI__builtin_ia32_pshufhw256:
14727
23
  case X86::BI__builtin_ia32_pshufhw512: {
14728
23
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14729
23
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14730
23
    unsigned NumElts = Ty->getNumElements();
14731
14732
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14733
23
    Imm = (Imm & 0xff) * 0x01010101;
14734
14735
23
    int Indices[32];
14736
72
    for (unsigned l = 0; l != NumElts; 
l += 849
) {
14737
245
      for (unsigned i = 0; i != 4; 
++i196
)
14738
196
        Indices[l + i] = l + i;
14739
245
      for (unsigned i = 4; i != 8; 
++i196
) {
14740
196
        Indices[l + i] = l + 4 + (Imm & 3);
14741
196
        Imm >>= 2;
14742
196
      }
14743
49
    }
14744
14745
23
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14746
23
                                       "pshufhw");
14747
17
  }
14748
7
  case X86::BI__builtin_ia32_pshufd:
14749
13
  case X86::BI__builtin_ia32_pshufd256:
14750
19
  case X86::BI__builtin_ia32_pshufd512:
14751
28
  case X86::BI__builtin_ia32_vpermilpd:
14752
44
  case X86::BI__builtin_ia32_vpermilps:
14753
53
  case X86::BI__builtin_ia32_vpermilpd256:
14754
62
  case X86::BI__builtin_ia32_vpermilps256:
14755
68
  case X86::BI__builtin_ia32_vpermilpd512:
14756
74
  case X86::BI__builtin_ia32_vpermilps512: {
14757
74
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14758
74
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14759
74
    unsigned NumElts = Ty->getNumElements();
14760
74
    unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
14761
74
    unsigned NumLaneElts = NumElts / NumLanes;
14762
14763
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14764
74
    Imm = (Imm & 0xff) * 0x01010101;
14765
14766
74
    int Indices[16];
14767
226
    for (unsigned l = 0; l != NumElts; 
l += NumLaneElts152
) {
14768
658
      for (unsigned i = 0; i != NumLaneElts; 
++i506
) {
14769
506
        Indices[i + l] = (Imm % NumLaneElts) + l;
14770
506
        Imm /= NumLaneElts;
14771
506
      }
14772
152
    }
14773
14774
74
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14775
74
                                       "permil");
14776
68
  }
14777
7
  case X86::BI__builtin_ia32_shufpd:
14778
14
  case X86::BI__builtin_ia32_shufpd256:
14779
20
  case X86::BI__builtin_ia32_shufpd512:
14780
24
  case X86::BI__builtin_ia32_shufps:
14781
33
  case X86::BI__builtin_ia32_shufps256:
14782
39
  case X86::BI__builtin_ia32_shufps512: {
14783
39
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14784
39
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14785
39
    unsigned NumElts = Ty->getNumElements();
14786
39
    unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
14787
39
    unsigned NumLaneElts = NumElts / NumLanes;
14788
14789
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14790
39
    Imm = (Imm & 0xff) * 0x01010101;
14791
14792
39
    int Indices[16];
14793
130
    for (unsigned l = 0; l != NumElts; 
l += NumLaneElts91
) {
14794
365
      for (unsigned i = 0; i != NumLaneElts; 
++i274
) {
14795
274
        unsigned Index = Imm % NumLaneElts;
14796
274
        Imm /= NumLaneElts;
14797
274
        if (i >= (NumLaneElts / 2))
14798
137
          Index += NumElts;
14799
274
        Indices[l + i] = l + Index;
14800
274
      }
14801
91
    }
14802
14803
39
    return Builder.CreateShuffleVector(Ops[0], Ops[1],
14804
39
                                       ArrayRef(Indices, NumElts), "shufp");
14805
33
  }
14806
7
  case X86::BI__builtin_ia32_permdi256:
14807
14
  case X86::BI__builtin_ia32_permdf256:
14808
20
  case X86::BI__builtin_ia32_permdi512:
14809
26
  case X86::BI__builtin_ia32_permdf512: {
14810
26
    unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14811
26
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14812
26
    unsigned NumElts = Ty->getNumElements();
14813
14814
    // These intrinsics operate on 256-bit lanes of four 64-bit elements.
14815
26
    int Indices[8];
14816
64
    for (unsigned l = 0; l != NumElts; 
l += 438
)
14817
190
      
for (unsigned i = 0; 38
i != 4;
++i152
)
14818
152
        Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
14819
14820
26
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14821
26
                                       "perm");
14822
20
  }
14823
14
  case X86::BI__builtin_ia32_palignr128:
14824
26
  case X86::BI__builtin_ia32_palignr256:
14825
32
  case X86::BI__builtin_ia32_palignr512: {
14826
32
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
14827
14828
32
    unsigned NumElts =
14829
32
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14830
32
    assert(NumElts % 16 == 0);
14831
14832
    // If palignr is shifting the pair of vectors more than the size of two
14833
    // lanes, emit zero.
14834
32
    if (ShiftVal >= 32)
14835
3
      return llvm::Constant::getNullValue(ConvertType(E->getType()));
14836
14837
    // If palignr is shifting the pair of input vectors more than one lane,
14838
    // but less than two lanes, convert to shifting in zeroes.
14839
29
    if (ShiftVal > 16) {
14840
7
      ShiftVal -= 16;
14841
7
      Ops[1] = Ops[0];
14842
7
      Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
14843
7
    }
14844
14845
29
    int Indices[64];
14846
    // 256-bit palignr operates on 128-bit lanes so we need to handle that
14847
88
    for (unsigned l = 0; l != NumElts; 
l += 1659
) {
14848
1.00k
      for (unsigned i = 0; i != 16; 
++i944
) {
14849
944
        unsigned Idx = ShiftVal + i;
14850
944
        if (Idx >= 16)
14851
134
          Idx += NumElts - 16; // End of lane, switch operand.
14852
944
        Indices[l + i] = Idx + l;
14853
944
      }
14854
59
    }
14855
14856
29
    return Builder.CreateShuffleVector(Ops[1], Ops[0],
14857
29
                                       ArrayRef(Indices, NumElts), "palignr");
14858
32
  }
14859
3
  case X86::BI__builtin_ia32_alignd128:
14860
6
  case X86::BI__builtin_ia32_alignd256:
14861
12
  case X86::BI__builtin_ia32_alignd512:
14862
15
  case X86::BI__builtin_ia32_alignq128:
14863
18
  case X86::BI__builtin_ia32_alignq256:
14864
24
  case X86::BI__builtin_ia32_alignq512: {
14865
24
    unsigned NumElts =
14866
24
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14867
24
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
14868
14869
    // Mask the shift amount to width of a vector.
14870
24
    ShiftVal &= NumElts - 1;
14871
14872
24
    int Indices[16];
14873
222
    for (unsigned i = 0; i != NumElts; 
++i198
)
14874
198
      Indices[i] = i + ShiftVal;
14875
14876
24
    return Builder.CreateShuffleVector(Ops[1], Ops[0],
14877
24
                                       ArrayRef(Indices, NumElts), "valign");
14878
18
  }
14879
3
  case X86::BI__builtin_ia32_shuf_f32x4_256:
14880
6
  case X86::BI__builtin_ia32_shuf_f64x2_256:
14881
9
  case X86::BI__builtin_ia32_shuf_i32x4_256:
14882
12
  case X86::BI__builtin_ia32_shuf_i64x2_256:
14883
18
  case X86::BI__builtin_ia32_shuf_f32x4:
14884
24
  case X86::BI__builtin_ia32_shuf_f64x2:
14885
30
  case X86::BI__builtin_ia32_shuf_i32x4:
14886
36
  case X86::BI__builtin_ia32_shuf_i64x2: {
14887
36
    unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14888
36
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14889
36
    unsigned NumElts = Ty->getNumElements();
14890
36
    unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 
424
:
212
;
14891
36
    unsigned NumLaneElts = NumElts / NumLanes;
14892
14893
36
    int Indices[16];
14894
156
    for (unsigned l = 0; l != NumElts; 
l += NumLaneElts120
) {
14895
120
      unsigned Index = (Imm % NumLanes) * NumLaneElts;
14896
120
      Imm /= NumLanes; // Discard the bits we just used.
14897
120
      if (l >= (NumElts / 2))
14898
60
        Index += NumElts; // Switch to other source.
14899
480
      for (unsigned i = 0; i != NumLaneElts; 
++i360
) {
14900
360
        Indices[l + i] = Index + i;
14901
360
      }
14902
120
    }
14903
14904
36
    return Builder.CreateShuffleVector(Ops[0], Ops[1],
14905
36
                                       ArrayRef(Indices, NumElts), "shuf");
14906
30
  }
14907
14908
9
  case X86::BI__builtin_ia32_vperm2f128_pd256:
14909
18
  case X86::BI__builtin_ia32_vperm2f128_ps256:
14910
27
  case X86::BI__builtin_ia32_vperm2f128_si256:
14911
31
  case X86::BI__builtin_ia32_permti256: {
14912
31
    unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14913
31
    unsigned NumElts =
14914
31
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14915
14916
    // This takes a very simple approach since there are two lanes and a
14917
    // shuffle can have 2 inputs. So we reserve the first input for the first
14918
    // lane and the second input for the second lane. This may result in
14919
    // duplicate sources, but this can be dealt with in the backend.
14920
14921
31
    Value *OutOps[2];
14922
31
    int Indices[8];
14923
93
    for (unsigned l = 0; l != 2; 
++l62
) {
14924
      // Determine the source for this lane.
14925
62
      if (Imm & (1 << ((l * 4) + 3)))
14926
4
        OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
14927
58
      else if (Imm & (1 << ((l * 4) + 1)))
14928
31
        OutOps[l] = Ops[1];
14929
27
      else
14930
27
        OutOps[l] = Ops[0];
14931
14932
258
      for (unsigned i = 0; i != NumElts/2; 
++i196
) {
14933
        // Start with ith element of the source for this lane.
14934
196
        unsigned Idx = (l * NumElts) + i;
14935
        // If bit 0 of the immediate half is set, switch to the high half of
14936
        // the source.
14937
196
        if (Imm & (1 << (l * 4)))
14938
112
          Idx += NumElts/2;
14939
196
        Indices[(l * (NumElts/2)) + i] = Idx;
14940
196
      }
14941
62
    }
14942
14943
31
    return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
14944
31
                                       ArrayRef(Indices, NumElts), "vperm");
14945
27
  }
14946
14947
18
  case X86::BI__builtin_ia32_pslldqi128_byteshift:
14948
26
  case X86::BI__builtin_ia32_pslldqi256_byteshift:
14949
28
  case X86::BI__builtin_ia32_pslldqi512_byteshift: {
14950
28
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
14951
28
    auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
14952
    // Builtin type is vXi64 so multiply by 8 to get bytes.
14953
28
    unsigned NumElts = ResultType->getNumElements() * 8;
14954
14955
    // If pslldq is shifting the vector more than 15 bytes, emit zero.
14956
28
    if (ShiftVal >= 16)
14957
6
      return llvm::Constant::getNullValue(ResultType);
14958
14959
22
    int Indices[64];
14960
    // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
14961
58
    for (unsigned l = 0; l != NumElts; 
l += 1636
) {
14962
612
      for (unsigned i = 0; i != 16; 
++i576
) {
14963
576
        unsigned Idx = NumElts + i - ShiftVal;
14964
576
        if (Idx < NumElts) 
Idx -= NumElts - 16143
; // end of lane, switch operand.
14965
576
        Indices[l + i] = Idx + l;
14966
576
      }
14967
36
    }
14968
14969
22
    auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
14970
22
    Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
14971
22
    Value *Zero = llvm::Constant::getNullValue(VecTy);
14972
22
    Value *SV = Builder.CreateShuffleVector(
14973
22
        Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
14974
22
    return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
14975
28
  }
14976
18
  case X86::BI__builtin_ia32_psrldqi128_byteshift:
14977
26
  case X86::BI__builtin_ia32_psrldqi256_byteshift:
14978
28
  case X86::BI__builtin_ia32_psrldqi512_byteshift: {
14979
28
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
14980
28
    auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
14981
    // Builtin type is vXi64 so multiply by 8 to get bytes.
14982
28
    unsigned NumElts = ResultType->getNumElements() * 8;
14983
14984
    // If psrldq is shifting the vector more than 15 bytes, emit zero.
14985
28
    if (ShiftVal >= 16)
14986
6
      return llvm::Constant::getNullValue(ResultType);
14987
14988
22
    int Indices[64];
14989
    // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
14990
58
    for (unsigned l = 0; l != NumElts; 
l += 1636
) {
14991
612
      for (unsigned i = 0; i != 16; 
++i576
) {
14992
576
        unsigned Idx = i + ShiftVal;
14993
576
        if (Idx >= 16) 
Idx += NumElts - 16143
; // end of lane, switch operand.
14994
576
        Indices[l + i] = Idx + l;
14995
576
      }
14996
36
    }
14997
14998
22
    auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
14999
22
    Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15000
22
    Value *Zero = llvm::Constant::getNullValue(VecTy);
15001
22
    Value *SV = Builder.CreateShuffleVector(
15002
22
        Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15003
22
    return Builder.CreateBitCast(SV, ResultType, "cast");
15004
28
  }
15005
1
  case X86::BI__builtin_ia32_kshiftliqi:
15006
3
  case X86::BI__builtin_ia32_kshiftlihi:
15007
5
  case X86::BI__builtin_ia32_kshiftlisi:
15008
7
  case X86::BI__builtin_ia32_kshiftlidi: {
15009
7
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15010
7
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15011
15012
7
    if (ShiftVal >= NumElts)
15013
0
      return llvm::Constant::getNullValue(Ops[0]->getType());
15014
15015
7
    Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15016
15017
7
    int Indices[64];
15018
239
    for (unsigned i = 0; i != NumElts; 
++i232
)
15019
232
      Indices[i] = NumElts + i - ShiftVal;
15020
15021
7
    Value *Zero = llvm::Constant::getNullValue(In->getType());
15022
7
    Value *SV = Builder.CreateShuffleVector(
15023
7
        Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15024
7
    return Builder.CreateBitCast(SV, Ops[0]->getType());
15025
7
  }
15026
1
  case X86::BI__builtin_ia32_kshiftriqi:
15027
3
  case X86::BI__builtin_ia32_kshiftrihi:
15028
5
  case X86::BI__builtin_ia32_kshiftrisi:
15029
7
  case X86::BI__builtin_ia32_kshiftridi: {
15030
7
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15031
7
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15032
15033
7
    if (ShiftVal >= NumElts)
15034
0
      return llvm::Constant::getNullValue(Ops[0]->getType());
15035
15036
7
    Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15037
15038
7
    int Indices[64];
15039
239
    for (unsigned i = 0; i != NumElts; 
++i232
)
15040
232
      Indices[i] = i + ShiftVal;
15041
15042
7
    Value *Zero = llvm::Constant::getNullValue(In->getType());
15043
7
    Value *SV = Builder.CreateShuffleVector(
15044
7
        In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15045
7
    return Builder.CreateBitCast(SV, Ops[0]->getType());
15046
7
  }
15047
7
  case X86::BI__builtin_ia32_movnti:
15048
12
  case X86::BI__builtin_ia32_movnti64:
15049
14
  case X86::BI__builtin_ia32_movntsd:
15050
16
  case X86::BI__builtin_ia32_movntss: {
15051
16
    llvm::MDNode *Node = llvm::MDNode::get(
15052
16
        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15053
15054
16
    Value *Ptr = Ops[0];
15055
16
    Value *Src = Ops[1];
15056
15057
    // Extract the 0'th element of the source vector.
15058
16
    if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15059
16
        
BuiltinID == X86::BI__builtin_ia32_movntss14
)
15060
4
      Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15061
15062
    // Unaligned nontemporal store of the scalar value.
15063
16
    StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15064
16
    SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15065
16
    SI->setAlignment(llvm::Align(1));
15066
16
    return SI;
15067
14
  }
15068
  // Rotate is a special case of funnel shift - 1st 2 args are the same.
15069
4
  case X86::BI__builtin_ia32_vprotb:
15070
8
  case X86::BI__builtin_ia32_vprotw:
15071
12
  case X86::BI__builtin_ia32_vprotd:
15072
16
  case X86::BI__builtin_ia32_vprotq:
15073
20
  case X86::BI__builtin_ia32_vprotbi:
15074
24
  case X86::BI__builtin_ia32_vprotwi:
15075
28
  case X86::BI__builtin_ia32_vprotdi:
15076
32
  case X86::BI__builtin_ia32_vprotqi:
15077
35
  case X86::BI__builtin_ia32_prold128:
15078
38
  case X86::BI__builtin_ia32_prold256:
15079
44
  case X86::BI__builtin_ia32_prold512:
15080
47
  case X86::BI__builtin_ia32_prolq128:
15081
50
  case X86::BI__builtin_ia32_prolq256:
15082
56
  case X86::BI__builtin_ia32_prolq512:
15083
57
  case X86::BI__builtin_ia32_prolvd128:
15084
58
  case X86::BI__builtin_ia32_prolvd256:
15085
60
  case X86::BI__builtin_ia32_prolvd512:
15086
61
  case X86::BI__builtin_ia32_prolvq128:
15087
62
  case X86::BI__builtin_ia32_prolvq256:
15088
64
  case X86::BI__builtin_ia32_prolvq512:
15089
64
    return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15090
3
  case X86::BI__builtin_ia32_prord128:
15091
6
  case X86::BI__builtin_ia32_prord256:
15092
12
  case X86::BI__builtin_ia32_prord512:
15093
15
  case X86::BI__builtin_ia32_prorq128:
15094
18
  case X86::BI__builtin_ia32_prorq256:
15095
24
  case X86::BI__builtin_ia32_prorq512:
15096
25
  case X86::BI__builtin_ia32_prorvd128:
15097
26
  case X86::BI__builtin_ia32_prorvd256:
15098
28
  case X86::BI__builtin_ia32_prorvd512:
15099
29
  case X86::BI__builtin_ia32_prorvq128:
15100
30
  case X86::BI__builtin_ia32_prorvq256:
15101
32
  case X86::BI__builtin_ia32_prorvq512:
15102
32
    return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15103
107
  case X86::BI__builtin_ia32_selectb_128:
15104
210
  case X86::BI__builtin_ia32_selectb_256:
15105
309
  case X86::BI__builtin_ia32_selectb_512:
15106
486
  case X86::BI__builtin_ia32_selectw_128:
15107
663
  case X86::BI__builtin_ia32_selectw_256:
15108
836
  case X86::BI__builtin_ia32_selectw_512:
15109
947
  case X86::BI__builtin_ia32_selectd_128:
15110
1.06k
  case X86::BI__builtin_ia32_selectd_256:
15111
1.25k
  case X86::BI__builtin_ia32_selectd_512:
15112
1.35k
  case X86::BI__builtin_ia32_selectq_128:
15113
1.45k
  case X86::BI__builtin_ia32_selectq_256:
15114
1.65k
  case X86::BI__builtin_ia32_selectq_512:
15115
1.69k
  case X86::BI__builtin_ia32_selectph_128:
15116
1.73k
  case X86::BI__builtin_ia32_selectph_256:
15117
1.76k
  case X86::BI__builtin_ia32_selectph_512:
15118
1.76k
  case X86::BI__builtin_ia32_selectpbf_128:
15119
1.76k
  case X86::BI__builtin_ia32_selectpbf_256:
15120
1.76k
  case X86::BI__builtin_ia32_selectpbf_512:
15121
1.84k
  case X86::BI__builtin_ia32_selectps_128:
15122
1.92k
  case X86::BI__builtin_ia32_selectps_256:
15123
2.08k
  case X86::BI__builtin_ia32_selectps_512:
15124
2.15k
  case X86::BI__builtin_ia32_selectpd_128:
15125
2.23k
  case X86::BI__builtin_ia32_selectpd_256:
15126
2.40k
  case X86::BI__builtin_ia32_selectpd_512:
15127
2.40k
    return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15128
10
  case X86::BI__builtin_ia32_selectsh_128:
15129
10
  case X86::BI__builtin_ia32_selectsbf_128:
15130
30
  case X86::BI__builtin_ia32_selectss_128:
15131
50
  case X86::BI__builtin_ia32_selectsd_128: {
15132
50
    Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15133
50
    Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15134
50
    A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15135
50
    return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15136
30
  }
15137
36
  case X86::BI__builtin_ia32_cmpb128_mask:
15138
72
  case X86::BI__builtin_ia32_cmpb256_mask:
15139
108
  case X86::BI__builtin_ia32_cmpb512_mask:
15140
144
  case X86::BI__builtin_ia32_cmpw128_mask:
15141
180
  case X86::BI__builtin_ia32_cmpw256_mask:
15142
216
  case X86::BI__builtin_ia32_cmpw512_mask:
15143
235
  case X86::BI__builtin_ia32_cmpd128_mask:
15144
253
  case X86::BI__builtin_ia32_cmpd256_mask:
15145
290
  case X86::BI__builtin_ia32_cmpd512_mask:
15146
308
  case X86::BI__builtin_ia32_cmpq128_mask:
15147
327
  case X86::BI__builtin_ia32_cmpq256_mask:
15148
361
  case X86::BI__builtin_ia32_cmpq512_mask: {
15149
361
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15150
361
    return EmitX86MaskedCompare(*this, CC, true, Ops);
15151
327
  }
15152
28
  case X86::BI__builtin_ia32_ucmpb128_mask:
15153
56
  case X86::BI__builtin_ia32_ucmpb256_mask:
15154
166
  case X86::BI__builtin_ia32_ucmpb512_mask:
15155
194
  case X86::BI__builtin_ia32_ucmpw128_mask:
15156
222
  case X86::BI__builtin_ia32_ucmpw256_mask:
15157
332
  case X86::BI__builtin_ia32_ucmpw512_mask:
15158
346
  case X86::BI__builtin_ia32_ucmpd128_mask:
15159
358
  case X86::BI__builtin_ia32_ucmpd256_mask:
15160
497
  case X86::BI__builtin_ia32_ucmpd512_mask:
15161
511
  case X86::BI__builtin_ia32_ucmpq128_mask:
15162
523
  case X86::BI__builtin_ia32_ucmpq256_mask:
15163
590
  case X86::BI__builtin_ia32_ucmpq512_mask: {
15164
590
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15165
590
    return EmitX86MaskedCompare(*this, CC, false, Ops);
15166
523
  }
15167
36
  case X86::BI__builtin_ia32_vpcomb:
15168
72
  case X86::BI__builtin_ia32_vpcomw:
15169
108
  case X86::BI__builtin_ia32_vpcomd:
15170
144
  case X86::BI__builtin_ia32_vpcomq:
15171
144
    return EmitX86vpcom(*this, Ops, true);
15172
36
  case X86::BI__builtin_ia32_vpcomub:
15173
72
  case X86::BI__builtin_ia32_vpcomuw:
15174
108
  case X86::BI__builtin_ia32_vpcomud:
15175
144
  case X86::BI__builtin_ia32_vpcomuq:
15176
144
    return EmitX86vpcom(*this, Ops, false);
15177
15178
2
  case X86::BI__builtin_ia32_kortestcqi:
15179
8
  case X86::BI__builtin_ia32_kortestchi:
15180
12
  case X86::BI__builtin_ia32_kortestcsi:
15181
16
  case X86::BI__builtin_ia32_kortestcdi: {
15182
16
    Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15183
16
    Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15184
16
    Value *Cmp = Builder.CreateICmpEQ(Or, C);
15185
16
    return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15186
12
  }
15187
2
  case X86::BI__builtin_ia32_kortestzqi:
15188
8
  case X86::BI__builtin_ia32_kortestzhi:
15189
12
  case X86::BI__builtin_ia32_kortestzsi:
15190
16
  case X86::BI__builtin_ia32_kortestzdi: {
15191
16
    Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15192
16
    Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15193
16
    Value *Cmp = Builder.CreateICmpEQ(Or, C);
15194
16
    return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15195
12
  }
15196
15197
2
  case X86::BI__builtin_ia32_ktestcqi:
15198
4
  case X86::BI__builtin_ia32_ktestzqi:
15199
6
  case X86::BI__builtin_ia32_ktestchi:
15200
8
  case X86::BI__builtin_ia32_ktestzhi:
15201
12
  case X86::BI__builtin_ia32_ktestcsi:
15202
16
  case X86::BI__builtin_ia32_ktestzsi:
15203
20
  case X86::BI__builtin_ia32_ktestcdi:
15204
24
  case X86::BI__builtin_ia32_ktestzdi: {
15205
24
    Intrinsic::ID IID;
15206
24
    switch (BuiltinID) {
15207
0
    default: llvm_unreachable("Unsupported intrinsic!");
15208
2
    case X86::BI__builtin_ia32_ktestcqi:
15209
2
      IID = Intrinsic::x86_avx512_ktestc_b;
15210
2
      break;
15211
2
    case X86::BI__builtin_ia32_ktestzqi:
15212
2
      IID = Intrinsic::x86_avx512_ktestz_b;
15213
2
      break;
15214
2
    case X86::BI__builtin_ia32_ktestchi:
15215
2
      IID = Intrinsic::x86_avx512_ktestc_w;
15216
2
      break;
15217
2
    case X86::BI__builtin_ia32_ktestzhi:
15218
2
      IID = Intrinsic::x86_avx512_ktestz_w;
15219
2
      break;
15220
4
    case X86::BI__builtin_ia32_ktestcsi:
15221
4
      IID = Intrinsic::x86_avx512_ktestc_d;
15222
4
      break;
15223
4
    case X86::BI__builtin_ia32_ktestzsi:
15224
4
      IID = Intrinsic::x86_avx512_ktestz_d;
15225
4
      break;
15226
4
    case X86::BI__builtin_ia32_ktestcdi:
15227
4
      IID = Intrinsic::x86_avx512_ktestc_q;
15228
4
      break;
15229
4
    case X86::BI__builtin_ia32_ktestzdi:
15230
4
      IID = Intrinsic::x86_avx512_ktestz_q;
15231
4
      break;
15232
24
    }
15233
15234
24
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15235
24
    Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15236
24
    Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15237
24
    Function *Intr = CGM.getIntrinsic(IID);
15238
24
    return Builder.CreateCall(Intr, {LHS, RHS});
15239
24
  }
15240
15241
1
  case X86::BI__builtin_ia32_kaddqi:
15242
2
  case X86::BI__builtin_ia32_kaddhi:
15243
4
  case X86::BI__builtin_ia32_kaddsi:
15244
6
  case X86::BI__builtin_ia32_kadddi: {
15245
6
    Intrinsic::ID IID;
15246
6
    switch (BuiltinID) {
15247
0
    default: llvm_unreachable("Unsupported intrinsic!");
15248
1
    case X86::BI__builtin_ia32_kaddqi:
15249
1
      IID = Intrinsic::x86_avx512_kadd_b;
15250
1
      break;
15251
1
    case X86::BI__builtin_ia32_kaddhi:
15252
1
      IID = Intrinsic::x86_avx512_kadd_w;
15253
1
      break;
15254
2
    case X86::BI__builtin_ia32_kaddsi:
15255
2
      IID = Intrinsic::x86_avx512_kadd_d;
15256
2
      break;
15257
2
    case X86::BI__builtin_ia32_kadddi:
15258
2
      IID = Intrinsic::x86_avx512_kadd_q;
15259
2
      break;
15260
6
    }
15261
15262
6
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15263
6
    Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15264
6
    Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15265
6
    Function *Intr = CGM.getIntrinsic(IID);
15266
6
    Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15267
6
    return Builder.CreateBitCast(Res, Ops[0]->getType());
15268
6
  }
15269
1
  case X86::BI__builtin_ia32_kandqi:
15270
3
  case X86::BI__builtin_ia32_kandhi:
15271
5
  case X86::BI__builtin_ia32_kandsi:
15272
7
  case X86::BI__builtin_ia32_kanddi:
15273
7
    return EmitX86MaskLogic(*this, Instruction::And, Ops);
15274
1
  case X86::BI__builtin_ia32_kandnqi:
15275
3
  case X86::BI__builtin_ia32_kandnhi:
15276
5
  case X86::BI__builtin_ia32_kandnsi:
15277
7
  case X86::BI__builtin_ia32_kandndi:
15278
7
    return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15279
1
  case X86::BI__builtin_ia32_korqi:
15280
4
  case X86::BI__builtin_ia32_korhi:
15281
6
  case X86::BI__builtin_ia32_korsi:
15282
8
  case X86::BI__builtin_ia32_kordi:
15283
8
    return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15284
1
  case X86::BI__builtin_ia32_kxnorqi:
15285
3
  case X86::BI__builtin_ia32_kxnorhi:
15286
5
  case X86::BI__builtin_ia32_kxnorsi:
15287
7
  case X86::BI__builtin_ia32_kxnordi:
15288
7
    return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15289
1
  case X86::BI__builtin_ia32_kxorqi:
15290
3
  case X86::BI__builtin_ia32_kxorhi:
15291
5
  case X86::BI__builtin_ia32_kxorsi:
15292
7
  case X86::BI__builtin_ia32_kxordi:
15293
7
    return EmitX86MaskLogic(*this, Instruction::Xor,  Ops);
15294
1
  case X86::BI__builtin_ia32_knotqi:
15295
3
  case X86::BI__builtin_ia32_knothi:
15296
5
  case X86::BI__builtin_ia32_knotsi:
15297
9
  case X86::BI__builtin_ia32_knotdi: {
15298
9
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15299
9
    Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15300
9
    return Builder.CreateBitCast(Builder.CreateNot(Res),
15301
9
                                 Ops[0]->getType());
15302
5
  }
15303
4
  case X86::BI__builtin_ia32_kmovb:
15304
12
  case X86::BI__builtin_ia32_kmovw:
15305
20
  case X86::BI__builtin_ia32_kmovd:
15306
28
  case X86::BI__builtin_ia32_kmovq: {
15307
    // Bitcast to vXi1 type and then back to integer. This gets the mask
15308
    // register type into the IR, but might be optimized out depending on
15309
    // what's around it.
15310
28
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15311
28
    Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15312
28
    return Builder.CreateBitCast(Res, Ops[0]->getType());
15313
20
  }
15314
15315
2
  case X86::BI__builtin_ia32_kunpckdi:
15316
4
  case X86::BI__builtin_ia32_kunpcksi:
15317
6
  case X86::BI__builtin_ia32_kunpckhi: {
15318
6
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15319
6
    Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15320
6
    Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15321
6
    int Indices[64];
15322
230
    for (unsigned i = 0; i != NumElts; 
++i224
)
15323
224
      Indices[i] = i;
15324
15325
    // First extract half of each vector. This gives better codegen than
15326
    // doing it in a single shuffle.
15327
6
    LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15328
6
    RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15329
    // Concat the vectors.
15330
    // NOTE: Operands are swapped to match the intrinsic definition.
15331
6
    Value *Res =
15332
6
        Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15333
6
    return Builder.CreateBitCast(Res, Ops[0]->getType());
15334
4
  }
15335
15336
1
  case X86::BI__builtin_ia32_vplzcntd_128:
15337
2
  case X86::BI__builtin_ia32_vplzcntd_256:
15338
3
  case X86::BI__builtin_ia32_vplzcntd_512:
15339
4
  case X86::BI__builtin_ia32_vplzcntq_128:
15340
5
  case X86::BI__builtin_ia32_vplzcntq_256:
15341
6
  case X86::BI__builtin_ia32_vplzcntq_512: {
15342
6
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15343
6
    return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15344
5
  }
15345
8
  case X86::BI__builtin_ia32_sqrtss:
15346
15
  case X86::BI__builtin_ia32_sqrtsd: {
15347
15
    Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15348
15
    Function *F;
15349
15
    if (Builder.getIsFPConstrained()) {
15350
2
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15351
2
      F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15352
2
                           A->getType());
15353
2
      A = Builder.CreateConstrainedFPCall(F, {A});
15354
13
    } else {
15355
13
      F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15356
13
      A = Builder.CreateCall(F, {A});
15357
13
    }
15358
15
    return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15359
8
  }
15360
6
  case X86::BI__builtin_ia32_sqrtsh_round_mask:
15361
28
  case X86::BI__builtin_ia32_sqrtsd_round_mask:
15362
50
  case X86::BI__builtin_ia32_sqrtss_round_mask: {
15363
50
    unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15364
    // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15365
    // otherwise keep the intrinsic.
15366
50
    if (CC != 4) {
15367
15
      Intrinsic::ID IID;
15368
15369
15
      switch (BuiltinID) {
15370
0
      default:
15371
0
        llvm_unreachable("Unsupported intrinsic!");
15372
3
      case X86::BI__builtin_ia32_sqrtsh_round_mask:
15373
3
        IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15374
3
        break;
15375
6
      case X86::BI__builtin_ia32_sqrtsd_round_mask:
15376
6
        IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15377
6
        break;
15378
6
      case X86::BI__builtin_ia32_sqrtss_round_mask:
15379
6
        IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15380
6
        break;
15381
15
      }
15382
15
      return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15383
15
    }
15384
35
    Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15385
35
    Function *F;
15386
35
    if (Builder.getIsFPConstrained()) {
15387
12
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15388
12
      F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15389
12
                           A->getType());
15390
12
      A = Builder.CreateConstrainedFPCall(F, A);
15391
23
    } else {
15392
23
      F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15393
23
      A = Builder.CreateCall(F, A);
15394
23
    }
15395
35
    Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15396
35
    A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15397
35
    return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15398
50
  }
15399
8
  case X86::BI__builtin_ia32_sqrtpd256:
15400
16
  case X86::BI__builtin_ia32_sqrtpd:
15401
24
  case X86::BI__builtin_ia32_sqrtps256:
15402
33
  case X86::BI__builtin_ia32_sqrtps:
15403
34
  case X86::BI__builtin_ia32_sqrtph256:
15404
35
  case X86::BI__builtin_ia32_sqrtph:
15405
41
  case X86::BI__builtin_ia32_sqrtph512:
15406
55
  case X86::BI__builtin_ia32_sqrtps512:
15407
77
  case X86::BI__builtin_ia32_sqrtpd512: {
15408
77
    if (Ops.size() == 2) {
15409
42
      unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15410
      // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15411
      // otherwise keep the intrinsic.
15412
42
      if (CC != 4) {
15413
15
        Intrinsic::ID IID;
15414
15415
15
        switch (BuiltinID) {
15416
0
        default:
15417
0
          llvm_unreachable("Unsupported intrinsic!");
15418
3
        case X86::BI__builtin_ia32_sqrtph512:
15419
3
          IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15420
3
          break;
15421
6
        case X86::BI__builtin_ia32_sqrtps512:
15422
6
          IID = Intrinsic::x86_avx512_sqrt_ps_512;
15423
6
          break;
15424
6
        case X86::BI__builtin_ia32_sqrtpd512:
15425
6
          IID = Intrinsic::x86_avx512_sqrt_pd_512;
15426
6
          break;
15427
15
        }
15428
15
        return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15429
15
      }
15430
42
    }
15431
62
    if (Builder.getIsFPConstrained()) {
15432
8
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15433
8
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15434
8
                                     Ops[0]->getType());
15435
8
      return Builder.CreateConstrainedFPCall(F, Ops[0]);
15436
54
    } else {
15437
54
      Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15438
54
      return Builder.CreateCall(F, Ops[0]);
15439
54
    }
15440
62
  }
15441
15442
8
  case X86::BI__builtin_ia32_pmuludq128:
15443
13
  case X86::BI__builtin_ia32_pmuludq256:
15444
15
  case X86::BI__builtin_ia32_pmuludq512:
15445
15
    return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15446
15447
5
  case X86::BI__builtin_ia32_pmuldq128:
15448
10
  case X86::BI__builtin_ia32_pmuldq256:
15449
12
  case X86::BI__builtin_ia32_pmuldq512:
15450
12
    return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15451
15452
4
  case X86::BI__builtin_ia32_pternlogd512_mask:
15453
8
  case X86::BI__builtin_ia32_pternlogq512_mask:
15454
10
  case X86::BI__builtin_ia32_pternlogd128_mask:
15455
12
  case X86::BI__builtin_ia32_pternlogd256_mask:
15456
14
  case X86::BI__builtin_ia32_pternlogq128_mask:
15457
16
  case X86::BI__builtin_ia32_pternlogq256_mask:
15458
16
    return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15459
15460
2
  case X86::BI__builtin_ia32_pternlogd512_maskz:
15461
4
  case X86::BI__builtin_ia32_pternlogq512_maskz:
15462
5
  case X86::BI__builtin_ia32_pternlogd128_maskz:
15463
6
  case X86::BI__builtin_ia32_pternlogd256_maskz:
15464
7
  case X86::BI__builtin_ia32_pternlogq128_maskz:
15465
8
  case X86::BI__builtin_ia32_pternlogq256_maskz:
15466
8
    return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15467
15468
3
  case X86::BI__builtin_ia32_vpshldd128:
15469
6
  case X86::BI__builtin_ia32_vpshldd256:
15470
9
  case X86::BI__builtin_ia32_vpshldd512:
15471
12
  case X86::BI__builtin_ia32_vpshldq128:
15472
15
  case X86::BI__builtin_ia32_vpshldq256:
15473
18
  case X86::BI__builtin_ia32_vpshldq512:
15474
21
  case X86::BI__builtin_ia32_vpshldw128:
15475
24
  case X86::BI__builtin_ia32_vpshldw256:
15476
27
  case X86::BI__builtin_ia32_vpshldw512:
15477
27
    return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15478
15479
3
  case X86::BI__builtin_ia32_vpshrdd128:
15480
6
  case X86::BI__builtin_ia32_vpshrdd256:
15481
9
  case X86::BI__builtin_ia32_vpshrdd512:
15482
12
  case X86::BI__builtin_ia32_vpshrdq128:
15483
15
  case X86::BI__builtin_ia32_vpshrdq256:
15484
18
  case X86::BI__builtin_ia32_vpshrdq512:
15485
21
  case X86::BI__builtin_ia32_vpshrdw128:
15486
24
  case X86::BI__builtin_ia32_vpshrdw256:
15487
27
  case X86::BI__builtin_ia32_vpshrdw512:
15488
    // Ops 0 and 1 are swapped.
15489
27
    return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15490
15491
1
  case X86::BI__builtin_ia32_vpshldvd128:
15492
2
  case X86::BI__builtin_ia32_vpshldvd256:
15493
3
  case X86::BI__builtin_ia32_vpshldvd512:
15494
4
  case X86::BI__builtin_ia32_vpshldvq128:
15495
5
  case X86::BI__builtin_ia32_vpshldvq256:
15496
6
  case X86::BI__builtin_ia32_vpshldvq512:
15497
7
  case X86::BI__builtin_ia32_vpshldvw128:
15498
8
  case X86::BI__builtin_ia32_vpshldvw256:
15499
9
  case X86::BI__builtin_ia32_vpshldvw512:
15500
9
    return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15501
15502
1
  case X86::BI__builtin_ia32_vpshrdvd128:
15503
2
  case X86::BI__builtin_ia32_vpshrdvd256:
15504
3
  case X86::BI__builtin_ia32_vpshrdvd512:
15505
4
  case X86::BI__builtin_ia32_vpshrdvq128:
15506
5
  case X86::BI__builtin_ia32_vpshrdvq256:
15507
6
  case X86::BI__builtin_ia32_vpshrdvq512:
15508
7
  case X86::BI__builtin_ia32_vpshrdvw128:
15509
8
  case X86::BI__builtin_ia32_vpshrdvw256:
15510
9
  case X86::BI__builtin_ia32_vpshrdvw512:
15511
    // Ops 0 and 1 are swapped.
15512
9
    return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15513
15514
  // Reductions
15515
4
  case X86::BI__builtin_ia32_reduce_fadd_pd512:
15516
6
  case X86::BI__builtin_ia32_reduce_fadd_ps512:
15517
7
  case X86::BI__builtin_ia32_reduce_fadd_ph512:
15518
8
  case X86::BI__builtin_ia32_reduce_fadd_ph256:
15519
9
  case X86::BI__builtin_ia32_reduce_fadd_ph128: {
15520
9
    Function *F =
15521
9
        CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
15522
9
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15523
9
    Builder.getFastMathFlags().setAllowReassoc();
15524
9
    return Builder.CreateCall(F, {Ops[0], Ops[1]});
15525
8
  }
15526
2
  case X86::BI__builtin_ia32_reduce_fmul_pd512:
15527
5
  case X86::BI__builtin_ia32_reduce_fmul_ps512:
15528
6
  case X86::BI__builtin_ia32_reduce_fmul_ph512:
15529
7
  case X86::BI__builtin_ia32_reduce_fmul_ph256:
15530
8
  case X86::BI__builtin_ia32_reduce_fmul_ph128: {
15531
8
    Function *F =
15532
8
        CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
15533
8
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15534
8
    Builder.getFastMathFlags().setAllowReassoc();
15535
8
    return Builder.CreateCall(F, {Ops[0], Ops[1]});
15536
7
  }
15537
2
  case X86::BI__builtin_ia32_reduce_fmax_pd512:
15538
4
  case X86::BI__builtin_ia32_reduce_fmax_ps512:
15539
5
  case X86::BI__builtin_ia32_reduce_fmax_ph512:
15540
6
  case X86::BI__builtin_ia32_reduce_fmax_ph256:
15541
8
  case X86::BI__builtin_ia32_reduce_fmax_ph128: {
15542
8
    Function *F =
15543
8
        CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
15544
8
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15545
8
    Builder.getFastMathFlags().setNoNaNs();
15546
8
    return Builder.CreateCall(F, {Ops[0]});
15547
6
  }
15548
2
  case X86::BI__builtin_ia32_reduce_fmin_pd512:
15549
4
  case X86::BI__builtin_ia32_reduce_fmin_ps512:
15550
5
  case X86::BI__builtin_ia32_reduce_fmin_ph512:
15551
7
  case X86::BI__builtin_ia32_reduce_fmin_ph256:
15552
8
  case X86::BI__builtin_ia32_reduce_fmin_ph128: {
15553
8
    Function *F =
15554
8
        CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
15555
8
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15556
8
    Builder.getFastMathFlags().setNoNaNs();
15557
8
    return Builder.CreateCall(F, {Ops[0]});
15558
7
  }
15559
15560
  // 3DNow!
15561
3
  case X86::BI__builtin_ia32_pswapdsf:
15562
6
  case X86::BI__builtin_ia32_pswapdsi: {
15563
6
    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
15564
6
    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
15565
6
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
15566
6
    return Builder.CreateCall(F, Ops, "pswapd");
15567
3
  }
15568
2
  case X86::BI__builtin_ia32_rdrand16_step:
15569
6
  case X86::BI__builtin_ia32_rdrand32_step:
15570
7
  case X86::BI__builtin_ia32_rdrand64_step:
15571
9
  case X86::BI__builtin_ia32_rdseed16_step:
15572
11
  case X86::BI__builtin_ia32_rdseed32_step:
15573
12
  case X86::BI__builtin_ia32_rdseed64_step: {
15574
12
    Intrinsic::ID ID;
15575
12
    switch (BuiltinID) {
15576
0
    default: llvm_unreachable("Unsupported intrinsic!");
15577
2
    case X86::BI__builtin_ia32_rdrand16_step:
15578
2
      ID = Intrinsic::x86_rdrand_16;
15579
2
      break;
15580
4
    case X86::BI__builtin_ia32_rdrand32_step:
15581
4
      ID = Intrinsic::x86_rdrand_32;
15582
4
      break;
15583
1
    case X86::BI__builtin_ia32_rdrand64_step:
15584
1
      ID = Intrinsic::x86_rdrand_64;
15585
1
      break;
15586
2
    case X86::BI__builtin_ia32_rdseed16_step:
15587
2
      ID = Intrinsic::x86_rdseed_16;
15588
2
      break;
15589
2
    case X86::BI__builtin_ia32_rdseed32_step:
15590
2
      ID = Intrinsic::x86_rdseed_32;
15591
2
      break;
15592
1
    case X86::BI__builtin_ia32_rdseed64_step:
15593
1
      ID = Intrinsic::x86_rdseed_64;
15594
1
      break;
15595
12
    }
15596
15597
12
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
15598
12
    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
15599
12
                                      Ops[0]);
15600
12
    return Builder.CreateExtractValue(Call, 1);
15601
12
  }
15602
2
  case X86::BI__builtin_ia32_addcarryx_u32:
15603
4
  case X86::BI__builtin_ia32_addcarryx_u64:
15604
5
  case X86::BI__builtin_ia32_subborrow_u32:
15605
6
  case X86::BI__builtin_ia32_subborrow_u64: {
15606
6
    Intrinsic::ID IID;
15607
6
    switch (BuiltinID) {
15608
0
    default: llvm_unreachable("Unsupported intrinsic!");
15609
2
    case X86::BI__builtin_ia32_addcarryx_u32:
15610
2
      IID = Intrinsic::x86_addcarry_32;
15611
2
      break;
15612
2
    case X86::BI__builtin_ia32_addcarryx_u64:
15613
2
      IID = Intrinsic::x86_addcarry_64;
15614
2
      break;
15615
1
    case X86::BI__builtin_ia32_subborrow_u32:
15616
1
      IID = Intrinsic::x86_subborrow_32;
15617
1
      break;
15618
1
    case X86::BI__builtin_ia32_subborrow_u64:
15619
1
      IID = Intrinsic::x86_subborrow_64;
15620
1
      break;
15621
6
    }
15622
15623
6
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
15624
6
                                     { Ops[0], Ops[1], Ops[2] });
15625
6
    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15626
6
                                      Ops[3]);
15627
6
    return Builder.CreateExtractValue(Call, 0);
15628
6
  }
15629
15630
2
  case X86::BI__builtin_ia32_fpclassps128_mask:
15631
4
  case X86::BI__builtin_ia32_fpclassps256_mask:
15632
6
  case X86::BI__builtin_ia32_fpclassps512_mask:
15633
8
  case X86::BI__builtin_ia32_fpclassph128_mask:
15634
10
  case X86::BI__builtin_ia32_fpclassph256_mask:
15635
12
  case X86::BI__builtin_ia32_fpclassph512_mask:
15636
14
  case X86::BI__builtin_ia32_fpclasspd128_mask:
15637
16
  case X86::BI__builtin_ia32_fpclasspd256_mask:
15638
18
  case X86::BI__builtin_ia32_fpclasspd512_mask: {
15639
18
    unsigned NumElts =
15640
18
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15641
18
    Value *MaskIn = Ops[2];
15642
18
    Ops.erase(&Ops[2]);
15643
15644
18
    Intrinsic::ID ID;
15645
18
    switch (BuiltinID) {
15646
0
    default: llvm_unreachable("Unsupported intrinsic!");
15647
2
    case X86::BI__builtin_ia32_fpclassph128_mask:
15648
2
      ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
15649
2
      break;
15650
2
    case X86::BI__builtin_ia32_fpclassph256_mask:
15651
2
      ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
15652
2
      break;
15653
2
    case X86::BI__builtin_ia32_fpclassph512_mask:
15654
2
      ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
15655
2
      break;
15656
2
    case X86::BI__builtin_ia32_fpclassps128_mask:
15657
2
      ID = Intrinsic::x86_avx512_fpclass_ps_128;
15658
2
      break;
15659
2
    case X86::BI__builtin_ia32_fpclassps256_mask:
15660
2
      ID = Intrinsic::x86_avx512_fpclass_ps_256;
15661
2
      break;
15662
2
    case X86::BI__builtin_ia32_fpclassps512_mask:
15663
2
      ID = Intrinsic::x86_avx512_fpclass_ps_512;
15664
2
      break;
15665
2
    case X86::BI__builtin_ia32_fpclasspd128_mask:
15666
2
      ID = Intrinsic::x86_avx512_fpclass_pd_128;
15667
2
      break;
15668
2
    case X86::BI__builtin_ia32_fpclasspd256_mask:
15669
2
      ID = Intrinsic::x86_avx512_fpclass_pd_256;
15670
2
      break;
15671
2
    case X86::BI__builtin_ia32_fpclasspd512_mask:
15672
2
      ID = Intrinsic::x86_avx512_fpclass_pd_512;
15673
2
      break;
15674
18
    }
15675
15676
18
    Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15677
18
    return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
15678
18
  }
15679
15680
2
  case X86::BI__builtin_ia32_vp2intersect_q_512:
15681
4
  case X86::BI__builtin_ia32_vp2intersect_q_256:
15682
6
  case X86::BI__builtin_ia32_vp2intersect_q_128:
15683
8
  case X86::BI__builtin_ia32_vp2intersect_d_512:
15684
10
  case X86::BI__builtin_ia32_vp2intersect_d_256:
15685
12
  case X86::BI__builtin_ia32_vp2intersect_d_128: {
15686
12
    unsigned NumElts =
15687
12
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15688
12
    Intrinsic::ID ID;
15689
15690
12
    switch (BuiltinID) {
15691
0
    default: llvm_unreachable("Unsupported intrinsic!");
15692
2
    case X86::BI__builtin_ia32_vp2intersect_q_512:
15693
2
      ID = Intrinsic::x86_avx512_vp2intersect_q_512;
15694
2
      break;
15695
2
    case X86::BI__builtin_ia32_vp2intersect_q_256:
15696
2
      ID = Intrinsic::x86_avx512_vp2intersect_q_256;
15697
2
      break;
15698
2
    case X86::BI__builtin_ia32_vp2intersect_q_128:
15699
2
      ID = Intrinsic::x86_avx512_vp2intersect_q_128;
15700
2
      break;
15701
2
    case X86::BI__builtin_ia32_vp2intersect_d_512:
15702
2
      ID = Intrinsic::x86_avx512_vp2intersect_d_512;
15703
2
      break;
15704
2
    case X86::BI__builtin_ia32_vp2intersect_d_256:
15705
2
      ID = Intrinsic::x86_avx512_vp2intersect_d_256;
15706
2
      break;
15707
2
    case X86::BI__builtin_ia32_vp2intersect_d_128:
15708
2
      ID = Intrinsic::x86_avx512_vp2intersect_d_128;
15709
2
      break;
15710
12
    }
15711
15712
12
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
15713
12
    Value *Result = Builder.CreateExtractValue(Call, 0);
15714
12
    Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
15715
12
    Builder.CreateDefaultAlignedStore(Result, Ops[2]);
15716
15717
12
    Result = Builder.CreateExtractValue(Call, 1);
15718
12
    Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
15719
12
    return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
15720
12
  }
15721
15722
0
  case X86::BI__builtin_ia32_vpmultishiftqb128:
15723
0
  case X86::BI__builtin_ia32_vpmultishiftqb256:
15724
0
  case X86::BI__builtin_ia32_vpmultishiftqb512: {
15725
0
    Intrinsic::ID ID;
15726
0
    switch (BuiltinID) {
15727
0
    default: llvm_unreachable("Unsupported intrinsic!");
15728
0
    case X86::BI__builtin_ia32_vpmultishiftqb128:
15729
0
      ID = Intrinsic::x86_avx512_pmultishift_qb_128;
15730
0
      break;
15731
0
    case X86::BI__builtin_ia32_vpmultishiftqb256:
15732
0
      ID = Intrinsic::x86_avx512_pmultishift_qb_256;
15733
0
      break;
15734
0
    case X86::BI__builtin_ia32_vpmultishiftqb512:
15735
0
      ID = Intrinsic::x86_avx512_pmultishift_qb_512;
15736
0
      break;
15737
0
    }
15738
15739
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15740
0
  }
15741
15742
1
  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
15743
2
  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
15744
3
  case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
15745
3
    unsigned NumElts =
15746
3
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15747
3
    Value *MaskIn = Ops[2];
15748
3
    Ops.erase(&Ops[2]);
15749
15750
3
    Intrinsic::ID ID;
15751
3
    switch (BuiltinID) {
15752
0
    default: llvm_unreachable("Unsupported intrinsic!");
15753
1
    case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
15754
1
      ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
15755
1
      break;
15756
1
    case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
15757
1
      ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
15758
1
      break;
15759
1
    case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
15760
1
      ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
15761
1
      break;
15762
3
    }
15763
15764
3
    Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15765
3
    return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
15766
3
  }
15767
15768
  // packed comparison intrinsics
15769
7
  case X86::BI__builtin_ia32_cmpeqps:
15770
18
  case X86::BI__builtin_ia32_cmpeqpd:
15771
18
    return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
15772
6
  case X86::BI__builtin_ia32_cmpltps:
15773
20
  case X86::BI__builtin_ia32_cmpltpd:
15774
20
    return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
15775
6
  case X86::BI__builtin_ia32_cmpleps:
15776
20
  case X86::BI__builtin_ia32_cmplepd:
15777
20
    return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
15778
3
  case X86::BI__builtin_ia32_cmpunordps:
15779
10
  case X86::BI__builtin_ia32_cmpunordpd:
15780
10
    return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
15781
3
  case X86::BI__builtin_ia32_cmpneqps:
15782
10
  case X86::BI__builtin_ia32_cmpneqpd:
15783
10
    return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
15784
6
  case X86::BI__builtin_ia32_cmpnltps:
15785
20
  case X86::BI__builtin_ia32_cmpnltpd:
15786
20
    return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
15787
6
  case X86::BI__builtin_ia32_cmpnleps:
15788
20
  case X86::BI__builtin_ia32_cmpnlepd:
15789
20
    return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
15790
3
  case X86::BI__builtin_ia32_cmpordps:
15791
10
  case X86::BI__builtin_ia32_cmpordpd:
15792
10
    return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
15793
81
  case X86::BI__builtin_ia32_cmpph128_mask:
15794
164
  case X86::BI__builtin_ia32_cmpph256_mask:
15795
260
  case X86::BI__builtin_ia32_cmpph512_mask:
15796
388
  case X86::BI__builtin_ia32_cmpps128_mask:
15797
516
  case X86::BI__builtin_ia32_cmpps256_mask:
15798
746
  case X86::BI__builtin_ia32_cmpps512_mask:
15799
874
  case X86::BI__builtin_ia32_cmppd128_mask:
15800
1.00k
  case X86::BI__builtin_ia32_cmppd256_mask:
15801
1.23k
  case X86::BI__builtin_ia32_cmppd512_mask:
15802
1.23k
    IsMaskFCmp = true;
15803
1.23k
    [[fallthrough]];
15804
1.47k
  case X86::BI__builtin_ia32_cmpps:
15805
1.70k
  case X86::BI__builtin_ia32_cmpps256:
15806
1.94k
  case X86::BI__builtin_ia32_cmppd:
15807
2.17k
  case X86::BI__builtin_ia32_cmppd256: {
15808
    // Lowering vector comparisons to fcmp instructions, while
15809
    // ignoring signalling behaviour requested
15810
    // ignoring rounding mode requested
15811
    // This is only possible if fp-model is not strict and FENV_ACCESS is off.
15812
15813
    // The third argument is the comparison condition, and integer in the
15814
    // range [0, 31]
15815
2.17k
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
15816
15817
    // Lowering to IR fcmp instruction.
15818
    // Ignoring requested signaling behaviour,
15819
    // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
15820
2.17k
    FCmpInst::Predicate Pred;
15821
2.17k
    bool IsSignaling;
15822
    // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
15823
    // behavior is inverted. We'll handle that after the switch.
15824
2.17k
    switch (CC & 0xf) {
15825
175
    case 0x00: Pred = FCmpInst::FCMP_OEQ;   IsSignaling = false; break;
15826
140
    case 0x01: Pred = FCmpInst::FCMP_OLT;   IsSignaling = true;  break;
15827
140
    case 0x02: Pred = FCmpInst::FCMP_OLE;   IsSignaling = true;  break;
15828
140
    case 0x03: Pred = FCmpInst::FCMP_UNO;   IsSignaling = false; break;
15829
140
    case 0x04: Pred = FCmpInst::FCMP_UNE;   IsSignaling = false; break;
15830
140
    case 0x05: Pred = FCmpInst::FCMP_UGE;   IsSignaling = true;  break;
15831
140
    case 0x06: Pred = FCmpInst::FCMP_UGT;   IsSignaling = true;  break;
15832
140
    case 0x07: Pred = FCmpInst::FCMP_ORD;   IsSignaling = false; break;
15833
128
    case 0x08: Pred = FCmpInst::FCMP_UEQ;   IsSignaling = false; break;
15834
128
    case 0x09: Pred = FCmpInst::FCMP_ULT;   IsSignaling = true;  break;
15835
128
    case 0x0a: Pred = FCmpInst::FCMP_ULE;   IsSignaling = true;  break;
15836
128
    case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
15837
128
    case 0x0c: Pred = FCmpInst::FCMP_ONE;   IsSignaling = false; break;
15838
128
    case 0x0d: Pred = FCmpInst::FCMP_OGE;   IsSignaling = true;  break;
15839
128
    case 0x0e: Pred = FCmpInst::FCMP_OGT;   IsSignaling = true;  break;
15840
128
    case 0x0f: Pred = FCmpInst::FCMP_TRUE;  IsSignaling = false; break;
15841
0
    default: llvm_unreachable("Unhandled CC");
15842
2.17k
    }
15843
15844
    // Invert the signalling behavior for 16-31.
15845
2.17k
    if (CC & 0x10)
15846
1.02k
      IsSignaling = !IsSignaling;
15847
15848
    // If the predicate is true or false and we're using constrained intrinsics,
15849
    // we don't have a compare intrinsic we can use. Just use the legacy X86
15850
    // specific intrinsic.
15851
    // If the intrinsic is mask enabled and we're using constrained intrinsics,
15852
    // use the legacy X86 specific intrinsic.
15853
2.17k
    if (Builder.getIsFPConstrained() &&
15854
2.17k
        
(710
Pred == FCmpInst::FCMP_TRUE710
||
Pred == FCmpInst::FCMP_FALSE666
||
15855
710
         
IsMaskFCmp622
)) {
15856
15857
486
      Intrinsic::ID IID;
15858
486
      switch (BuiltinID) {
15859
0
      default: llvm_unreachable("Unexpected builtin");
15860
8
      case X86::BI__builtin_ia32_cmpps:
15861
8
        IID = Intrinsic::x86_sse_cmp_ps;
15862
8
        break;
15863
8
      case X86::BI__builtin_ia32_cmpps256:
15864
8
        IID = Intrinsic::x86_avx_cmp_ps_256;
15865
8
        break;
15866
8
      case X86::BI__builtin_ia32_cmppd:
15867
8
        IID = Intrinsic::x86_sse2_cmp_pd;
15868
8
        break;
15869
8
      case X86::BI__builtin_ia32_cmppd256:
15870
8
        IID = Intrinsic::x86_avx_cmp_pd_256;
15871
8
        break;
15872
17
      case X86::BI__builtin_ia32_cmpph128_mask:
15873
17
        IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
15874
17
        break;
15875
19
      case X86::BI__builtin_ia32_cmpph256_mask:
15876
19
        IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
15877
19
        break;
15878
30
      case X86::BI__builtin_ia32_cmpph512_mask:
15879
30
        IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
15880
30
        break;
15881
66
      case X86::BI__builtin_ia32_cmpps512_mask:
15882
66
        IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
15883
66
        break;
15884
66
      case X86::BI__builtin_ia32_cmppd512_mask:
15885
66
        IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
15886
66
        break;
15887
64
      case X86::BI__builtin_ia32_cmpps128_mask:
15888
64
        IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
15889
64
        break;
15890
64
      case X86::BI__builtin_ia32_cmpps256_mask:
15891
64
        IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
15892
64
        break;
15893
64
      case X86::BI__builtin_ia32_cmppd128_mask:
15894
64
        IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
15895
64
        break;
15896
64
      case X86::BI__builtin_ia32_cmppd256_mask:
15897
64
        IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
15898
64
        break;
15899
486
      }
15900
15901
486
      Function *Intr = CGM.getIntrinsic(IID);
15902
486
      if (IsMaskFCmp) {
15903
454
        unsigned NumElts =
15904
454
            cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15905
454
        Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
15906
454
        Value *Cmp = Builder.CreateCall(Intr, Ops);
15907
454
        return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
15908
454
      }
15909
15910
32
      return Builder.CreateCall(Intr, Ops);
15911
486
    }
15912
15913
    // Builtins without the _mask suffix return a vector of integers
15914
    // of the same width as the input vectors
15915
1.69k
    if (IsMaskFCmp) {
15916
      // We ignore SAE if strict FP is disabled. We only keep precise
15917
      // exception behavior under strict FP.
15918
      // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
15919
      // object will be required.
15920
780
      unsigned NumElts =
15921
780
          cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15922
780
      Value *Cmp;
15923
780
      if (IsSignaling)
15924
384
        Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15925
396
      else
15926
396
        Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15927
780
      return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
15928
780
    }
15929
15930
913
    return getVectorFCmpIR(Pred, IsSignaling);
15931
1.69k
  }
15932
15933
  // SSE scalar comparison intrinsics
15934
2
  case X86::BI__builtin_ia32_cmpeqss:
15935
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
15936
6
  case X86::BI__builtin_ia32_cmpltss:
15937
6
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
15938
6
  case X86::BI__builtin_ia32_cmpless:
15939
6
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
15940
2
  case X86::BI__builtin_ia32_cmpunordss:
15941
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
15942
2
  case X86::BI__builtin_ia32_cmpneqss:
15943
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
15944
6
  case X86::BI__builtin_ia32_cmpnltss:
15945
6
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
15946
6
  case X86::BI__builtin_ia32_cmpnless:
15947
6
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
15948
2
  case X86::BI__builtin_ia32_cmpordss:
15949
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
15950
5
  case X86::BI__builtin_ia32_cmpeqsd:
15951
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
15952
12
  case X86::BI__builtin_ia32_cmpltsd:
15953
12
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
15954
12
  case X86::BI__builtin_ia32_cmplesd:
15955
12
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
15956
5
  case X86::BI__builtin_ia32_cmpunordsd:
15957
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
15958
5
  case X86::BI__builtin_ia32_cmpneqsd:
15959
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
15960
12
  case X86::BI__builtin_ia32_cmpnltsd:
15961
12
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
15962
12
  case X86::BI__builtin_ia32_cmpnlesd:
15963
12
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
15964
5
  case X86::BI__builtin_ia32_cmpordsd:
15965
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
15966
15967
  // f16c half2float intrinsics
15968
4
  case X86::BI__builtin_ia32_vcvtph2ps:
15969
6
  case X86::BI__builtin_ia32_vcvtph2ps256:
15970
16
  case X86::BI__builtin_ia32_vcvtph2ps_mask:
15971
26
  case X86::BI__builtin_ia32_vcvtph2ps256_mask:
15972
74
  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
15973
74
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15974
74
    return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
15975
26
  }
15976
15977
  // AVX512 bf16 intrinsics
15978
3
  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
15979
3
    Ops[2] = getMaskVecValue(
15980
3
        *this, Ops[2],
15981
3
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
15982
3
    Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
15983
3
    return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15984
26
  }
15985
1
  case X86::BI__builtin_ia32_cvtsbf162ss_32:
15986
1
    return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
15987
15988
2
  case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
15989
5
  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
15990
5
    Intrinsic::ID IID;
15991
5
    switch (BuiltinID) {
15992
0
    default: llvm_unreachable("Unsupported intrinsic!");
15993
2
    case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
15994
2
      IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
15995
2
      break;
15996
3
    case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
15997
3
      IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
15998
3
      break;
15999
5
    }
16000
5
    Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16001
5
    return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16002
5
  }
16003
16004
8
  case X86::BI__cpuid:
16005
14
  case X86::BI__cpuidex: {
16006
14
    Value *FuncId = EmitScalarExpr(E->getArg(1));
16007
14
    Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16008
14
                           ? 
EmitScalarExpr(E->getArg(2))6
16009
14
                           : 
llvm::ConstantInt::get(Int32Ty, 0)8
;
16010
16011
14
    llvm::StructType *CpuidRetTy =
16012
14
        llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16013
14
    llvm::FunctionType *FTy =
16014
14
        llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16015
16016
14
    StringRef Asm, Constraints;
16017
14
    if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16018
6
      Asm = "cpuid";
16019
6
      Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16020
8
    } else {
16021
      // x86-64 uses %rbx as the base register, so preserve it.
16022
8
      Asm = "xchgq %rbx, ${1:q}\n"
16023
8
            "cpuid\n"
16024
8
            "xchgq %rbx, ${1:q}";
16025
8
      Constraints = "={ax},=r,={cx},={dx},0,2";
16026
8
    }
16027
16028
14
    llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16029
14
                                               /*hasSideEffects=*/false);
16030
14
    Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16031
14
    Value *BasePtr = EmitScalarExpr(E->getArg(0));
16032
14
    Value *Store = nullptr;
16033
70
    for (unsigned i = 0; i < 4; 
i++56
) {
16034
56
      Value *Extracted = Builder.CreateExtractValue(IACall, i);
16035
56
      Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16036
56
      Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16037
56
    }
16038
16039
    // Return the last store instruction to signal that we have emitted the
16040
    // the intrinsic.
16041
14
    return Store;
16042
8
  }
16043
16044
2
  case X86::BI__emul:
16045
4
  case X86::BI__emulu: {
16046
4
    llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16047
4
    bool isSigned = (BuiltinID == X86::BI__emul);
16048
4
    Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16049
4
    Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16050
4
    return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16051
2
  }
16052
1
  case X86::BI__mulh:
16053
2
  case X86::BI__umulh:
16054
3
  case X86::BI_mul128:
16055
4
  case X86::BI_umul128: {
16056
4
    llvm::Type *ResType = ConvertType(E->getType());
16057
4
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16058
16059
4
    bool IsSigned = (BuiltinID == X86::BI__mulh || 
BuiltinID == X86::BI_mul1283
);
16060
4
    Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16061
4
    Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16062
16063
4
    Value *MulResult, *HigherBits;
16064
4
    if (IsSigned) {
16065
2
      MulResult = Builder.CreateNSWMul(LHS, RHS);
16066
2
      HigherBits = Builder.CreateAShr(MulResult, 64);
16067
2
    } else {
16068
2
      MulResult = Builder.CreateNUWMul(LHS, RHS);
16069
2
      HigherBits = Builder.CreateLShr(MulResult, 64);
16070
2
    }
16071
4
    HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16072
16073
4
    if (BuiltinID == X86::BI__mulh || 
BuiltinID == X86::BI__umulh3
)
16074
2
      return HigherBits;
16075
16076
2
    Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16077
2
    Builder.CreateStore(HigherBits, HighBitsAddress);
16078
2
    return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16079
4
  }
16080
16081
1
  case X86::BI__faststorefence: {
16082
1
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16083
1
                               llvm::SyncScope::System);
16084
4
  }
16085
1
  case X86::BI__shiftleft128:
16086
2
  case X86::BI__shiftright128: {
16087
2
    llvm::Function *F = CGM.getIntrinsic(
16088
2
        BuiltinID == X86::BI__shiftleft128 ? 
Intrinsic::fshl1
:
Intrinsic::fshr1
,
16089
2
        Int64Ty);
16090
    // Flip low/high ops and zero-extend amount to matching type.
16091
    // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16092
    // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16093
2
    std::swap(Ops[0], Ops[1]);
16094
2
    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16095
2
    return Builder.CreateCall(F, Ops);
16096
1
  }
16097
2
  case X86::BI_ReadWriteBarrier:
16098
4
  case X86::BI_ReadBarrier:
16099
6
  case X86::BI_WriteBarrier: {
16100
6
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16101
6
                               llvm::SyncScope::SingleThread);
16102
4
  }
16103
16104
2
  case X86::BI_AddressOfReturnAddress: {
16105
2
    Function *F =
16106
2
        CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16107
2
    return Builder.CreateCall(F);
16108
4
  }
16109
4
  case X86::BI__stosb: {
16110
    // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16111
    // instruction, but it will create a memset that won't be optimized away.
16112
4
    return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16113
4
  }
16114
2
  case X86::BI__ud2:
16115
    // llvm.trap makes a ud2a instruction on x86.
16116
2
    return EmitTrapCall(Intrinsic::trap);
16117
2
  case X86::BI__int2c: {
16118
    // This syscall signals a driver assertion failure in x86 NT kernels.
16119
2
    llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16120
2
    llvm::InlineAsm *IA =
16121
2
        llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16122
2
    llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16123
2
        getLLVMContext(), llvm::AttributeList::FunctionIndex,
16124
2
        llvm::Attribute::NoReturn);
16125
2
    llvm::CallInst *CI = Builder.CreateCall(IA);
16126
2
    CI->setAttributes(NoReturnAttr);
16127
2
    return CI;
16128
4
  }
16129
1
  case X86::BI__readfsbyte:
16130
2
  case X86::BI__readfsword:
16131
3
  case X86::BI__readfsdword:
16132
4
  case X86::BI__readfsqword: {
16133
4
    llvm::Type *IntTy = ConvertType(E->getType());
16134
4
    Value *Ptr = Builder.CreateIntToPtr(
16135
4
        Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16136
4
    LoadInst *Load = Builder.CreateAlignedLoad(
16137
4
        IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16138
4
    Load->setVolatile(true);
16139
4
    return Load;
16140
3
  }
16141
1
  case X86::BI__readgsbyte:
16142
2
  case X86::BI__readgsword:
16143
3
  case X86::BI__readgsdword:
16144
4
  case X86::BI__readgsqword: {
16145
4
    llvm::Type *IntTy = ConvertType(E->getType());
16146
4
    Value *Ptr = Builder.CreateIntToPtr(
16147
4
        Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16148
4
    LoadInst *Load = Builder.CreateAlignedLoad(
16149
4
        IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16150
4
    Load->setVolatile(true);
16151
4
    return Load;
16152
3
  }
16153
2
  case X86::BI__builtin_ia32_encodekey128_u32: {
16154
2
    Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16155
16156
2
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16157
16158
8
    for (int i = 0; i < 3; 
++i6
) {
16159
6
      Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16160
6
      Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16161
6
      Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16162
6
    }
16163
16164
2
    return Builder.CreateExtractValue(Call, 0);
16165
3
  }
16166
2
  case X86::BI__builtin_ia32_encodekey256_u32: {
16167
2
    Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16168
16169
2
    Value *Call =
16170
2
        Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16171
16172
10
    for (int i = 0; i < 4; 
++i8
) {
16173
8
      Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16174
8
      Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16175
8
      Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16176
8
    }
16177
16178
2
    return Builder.CreateExtractValue(Call, 0);
16179
3
  }
16180
2
  case X86::BI__builtin_ia32_aesenc128kl_u8:
16181
4
  case X86::BI__builtin_ia32_aesdec128kl_u8:
16182
6
  case X86::BI__builtin_ia32_aesenc256kl_u8:
16183
8
  case X86::BI__builtin_ia32_aesdec256kl_u8: {
16184
8
    Intrinsic::ID IID;
16185
8
    StringRef BlockName;
16186
8
    switch (BuiltinID) {
16187
0
    default:
16188
0
      llvm_unreachable("Unexpected builtin");
16189
2
    case X86::BI__builtin_ia32_aesenc128kl_u8:
16190
2
      IID = Intrinsic::x86_aesenc128kl;
16191
2
      BlockName = "aesenc128kl";
16192
2
      break;
16193
2
    case X86::BI__builtin_ia32_aesdec128kl_u8:
16194
2
      IID = Intrinsic::x86_aesdec128kl;
16195
2
      BlockName = "aesdec128kl";
16196
2
      break;
16197
2
    case X86::BI__builtin_ia32_aesenc256kl_u8:
16198
2
      IID = Intrinsic::x86_aesenc256kl;
16199
2
      BlockName = "aesenc256kl";
16200
2
      break;
16201
2
    case X86::BI__builtin_ia32_aesdec256kl_u8:
16202
2
      IID = Intrinsic::x86_aesdec256kl;
16203
2
      BlockName = "aesdec256kl";
16204
2
      break;
16205
8
    }
16206
16207
8
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16208
16209
8
    BasicBlock *NoError =
16210
8
        createBasicBlock(BlockName + "_no_error", this->CurFn);
16211
8
    BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16212
8
    BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16213
16214
8
    Value *Ret = Builder.CreateExtractValue(Call, 0);
16215
8
    Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16216
8
    Value *Out = Builder.CreateExtractValue(Call, 1);
16217
8
    Builder.CreateCondBr(Succ, NoError, Error);
16218
16219
8
    Builder.SetInsertPoint(NoError);
16220
8
    Builder.CreateDefaultAlignedStore(Out, Ops[0]);
16221
8
    Builder.CreateBr(End);
16222
16223
8
    Builder.SetInsertPoint(Error);
16224
8
    Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16225
8
    Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16226
8
    Builder.CreateBr(End);
16227
16228
8
    Builder.SetInsertPoint(End);
16229
8
    return Builder.CreateExtractValue(Call, 0);
16230
8
  }
16231
2
  case X86::BI__builtin_ia32_aesencwide128kl_u8:
16232
4
  case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16233
6
  case X86::BI__builtin_ia32_aesencwide256kl_u8:
16234
8
  case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16235
8
    Intrinsic::ID IID;
16236
8
    StringRef BlockName;
16237
8
    switch (BuiltinID) {
16238
2
    case X86::BI__builtin_ia32_aesencwide128kl_u8:
16239
2
      IID = Intrinsic::x86_aesencwide128kl;
16240
2
      BlockName = "aesencwide128kl";
16241
2
      break;
16242
2
    case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16243
2
      IID = Intrinsic::x86_aesdecwide128kl;
16244
2
      BlockName = "aesdecwide128kl";
16245
2
      break;
16246
2
    case X86::BI__builtin_ia32_aesencwide256kl_u8:
16247
2
      IID = Intrinsic::x86_aesencwide256kl;
16248
2
      BlockName = "aesencwide256kl";
16249
2
      break;
16250
2
    case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16251
2
      IID = Intrinsic::x86_aesdecwide256kl;
16252
2
      BlockName = "aesdecwide256kl";
16253
2
      break;
16254
8
    }
16255
16256
8
    llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16257
8
    Value *InOps[9];
16258
8
    InOps[0] = Ops[2];
16259
72
    for (int i = 0; i != 8; 
++i64
) {
16260
64
      Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16261
64
      InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16262
64
    }
16263
16264
8
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16265
16266
8
    BasicBlock *NoError =
16267
8
        createBasicBlock(BlockName + "_no_error", this->CurFn);
16268
8
    BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16269
8
    BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16270
16271
8
    Value *Ret = Builder.CreateExtractValue(Call, 0);
16272
8
    Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16273
8
    Builder.CreateCondBr(Succ, NoError, Error);
16274
16275
8
    Builder.SetInsertPoint(NoError);
16276
72
    for (int i = 0; i != 8; 
++i64
) {
16277
64
      Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16278
64
      Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16279
64
      Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16280
64
    }
16281
8
    Builder.CreateBr(End);
16282
16283
8
    Builder.SetInsertPoint(Error);
16284
72
    for (int i = 0; i != 8; 
++i64
) {
16285
64
      Value *Out = Builder.CreateExtractValue(Call, i + 1);
16286
64
      Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16287
64
      Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16288
64
      Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16289
64
    }
16290
8
    Builder.CreateBr(End);
16291
16292
8
    Builder.SetInsertPoint(End);
16293
8
    return Builder.CreateExtractValue(Call, 0);
16294
8
  }
16295
2
  case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16296
2
    IsConjFMA = true;
16297
2
    [[fallthrough]];
16298
4
  case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16299
4
    Intrinsic::ID IID = IsConjFMA
16300
4
                            ? 
Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_5122
16301
4
                            : 
Intrinsic::x86_avx512fp16_mask_vfmadd_cph_5122
;
16302
4
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16303
4
    return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16304
2
  }
16305
2
  case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16306
2
    IsConjFMA = true;
16307
2
    [[fallthrough]];
16308
4
  case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16309
4
    Intrinsic::ID IID = IsConjFMA ? 
Intrinsic::x86_avx512fp16_mask_vfcmadd_csh2
16310
4
                                  : 
Intrinsic::x86_avx512fp16_mask_vfmadd_csh2
;
16311
4
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16312
4
    Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16313
4
    return EmitX86Select(*this, And, Call, Ops[0]);
16314
2
  }
16315
2
  case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16316
2
    IsConjFMA = true;
16317
2
    [[fallthrough]];
16318
4
  case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16319
4
    Intrinsic::ID IID = IsConjFMA ? 
Intrinsic::x86_avx512fp16_mask_vfcmadd_csh2
16320
4
                                  : 
Intrinsic::x86_avx512fp16_mask_vfmadd_csh2
;
16321
4
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16322
4
    static constexpr int Mask[] = {0, 5, 6, 7};
16323
4
    return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16324
2
  }
16325
2
  case X86::BI__builtin_ia32_prefetchi:
16326
2
    return Builder.CreateCall(
16327
2
        CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16328
2
        {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16329
2
         llvm::ConstantInt::get(Int32Ty, 0)});
16330
9.15k
  }
16331
9.15k
}
16332
16333
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16334
269
                                           const CallExpr *E) {
16335
  // Do not emit the builtin arguments in the arguments of a function call,
16336
  // because the evaluation order of function arguments is not specified in C++.
16337
  // This is important when testing to ensure the arguments are emitted in the
16338
  // same order every time. Eg:
16339
  // Instead of:
16340
  //   return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16341
  //                             EmitScalarExpr(E->getArg(1)), "swdiv");
16342
  // Use:
16343
  //   Value *Op0 = EmitScalarExpr(E->getArg(0));
16344
  //   Value *Op1 = EmitScalarExpr(E->getArg(1));
16345
  //   return Builder.CreateFDiv(Op0, Op1, "swdiv")
16346
16347
269
  Intrinsic::ID ID = Intrinsic::not_intrinsic;
16348
16349
269
  switch (BuiltinID) {
16350
0
  default: return nullptr;
16351
16352
  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16353
  // call __builtin_readcyclecounter.
16354
0
  case PPC::BI__builtin_ppc_get_timebase:
16355
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16356
16357
  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16358
0
  case PPC::BI__builtin_altivec_lvx:
16359
0
  case PPC::BI__builtin_altivec_lvxl:
16360
0
  case PPC::BI__builtin_altivec_lvebx:
16361
0
  case PPC::BI__builtin_altivec_lvehx:
16362
0
  case PPC::BI__builtin_altivec_lvewx:
16363
0
  case PPC::BI__builtin_altivec_lvsl:
16364
0
  case PPC::BI__builtin_altivec_lvsr:
16365
0
  case PPC::BI__builtin_vsx_lxvd2x:
16366
0
  case PPC::BI__builtin_vsx_lxvw4x:
16367
0
  case PPC::BI__builtin_vsx_lxvd2x_be:
16368
0
  case PPC::BI__builtin_vsx_lxvw4x_be:
16369
0
  case PPC::BI__builtin_vsx_lxvl:
16370
0
  case PPC::BI__builtin_vsx_lxvll:
16371
0
  {
16372
0
    SmallVector<Value *, 2> Ops;
16373
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
16374
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
16375
0
    if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
16376
0
          BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
16377
0
      Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16378
0
      Ops.pop_back();
16379
0
    }
16380
16381
0
    switch (BuiltinID) {
16382
0
    default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
16383
0
    case PPC::BI__builtin_altivec_lvx:
16384
0
      ID = Intrinsic::ppc_altivec_lvx;
16385
0
      break;
16386
0
    case PPC::BI__builtin_altivec_lvxl:
16387
0
      ID = Intrinsic::ppc_altivec_lvxl;
16388
0
      break;
16389
0
    case PPC::BI__builtin_altivec_lvebx:
16390
0
      ID = Intrinsic::ppc_altivec_lvebx;
16391
0
      break;
16392
0
    case PPC::BI__builtin_altivec_lvehx:
16393
0
      ID = Intrinsic::ppc_altivec_lvehx;
16394
0
      break;
16395
0
    case PPC::BI__builtin_altivec_lvewx:
16396
0
      ID = Intrinsic::ppc_altivec_lvewx;
16397
0
      break;
16398
0
    case PPC::BI__builtin_altivec_lvsl:
16399
0
      ID = Intrinsic::ppc_altivec_lvsl;
16400
0
      break;
16401
0
    case PPC::BI__builtin_altivec_lvsr:
16402
0
      ID = Intrinsic::ppc_altivec_lvsr;
16403
0
      break;
16404
0
    case PPC::BI__builtin_vsx_lxvd2x:
16405
0
      ID = Intrinsic::ppc_vsx_lxvd2x;
16406
0
      break;
16407
0
    case PPC::BI__builtin_vsx_lxvw4x:
16408
0
      ID = Intrinsic::ppc_vsx_lxvw4x;
16409
0
      break;
16410
0
    case PPC::BI__builtin_vsx_lxvd2x_be:
16411
0
      ID = Intrinsic::ppc_vsx_lxvd2x_be;
16412
0
      break;
16413
0
    case PPC::BI__builtin_vsx_lxvw4x_be:
16414
0
      ID = Intrinsic::ppc_vsx_lxvw4x_be;
16415
0
      break;
16416
0
    case PPC::BI__builtin_vsx_lxvl:
16417
0
      ID = Intrinsic::ppc_vsx_lxvl;
16418
0
      break;
16419
0
    case PPC::BI__builtin_vsx_lxvll:
16420
0
      ID = Intrinsic::ppc_vsx_lxvll;
16421
0
      break;
16422
0
    }
16423
0
    llvm::Function *F = CGM.getIntrinsic(ID);
16424
0
    return Builder.CreateCall(F, Ops, "");
16425
0
  }
16426
16427
  // vec_st, vec_xst_be
16428
0
  case PPC::BI__builtin_altivec_stvx:
16429
0
  case PPC::BI__builtin_altivec_stvxl:
16430
0
  case PPC::BI__builtin_altivec_stvebx:
16431
0
  case PPC::BI__builtin_altivec_stvehx:
16432
0
  case PPC::BI__builtin_altivec_stvewx:
16433
0
  case PPC::BI__builtin_vsx_stxvd2x:
16434
0
  case PPC::BI__builtin_vsx_stxvw4x:
16435
0
  case PPC::BI__builtin_vsx_stxvd2x_be:
16436
0
  case PPC::BI__builtin_vsx_stxvw4x_be:
16437
0
  case PPC::BI__builtin_vsx_stxvl:
16438
0
  case PPC::BI__builtin_vsx_stxvll:
16439
0
  {
16440
0
    SmallVector<Value *, 3> Ops;
16441
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
16442
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
16443
0
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
16444
0
    if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
16445
0
          BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
16446
0
      Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16447
0
      Ops.pop_back();
16448
0
    }
16449
16450
0
    switch (BuiltinID) {
16451
0
    default: llvm_unreachable("Unsupported st intrinsic!");
16452
0
    case PPC::BI__builtin_altivec_stvx:
16453
0
      ID = Intrinsic::ppc_altivec_stvx;
16454
0
      break;
16455
0
    case PPC::BI__builtin_altivec_stvxl:
16456
0
      ID = Intrinsic::ppc_altivec_stvxl;
16457
0
      break;
16458
0
    case PPC::BI__builtin_altivec_stvebx:
16459
0
      ID = Intrinsic::ppc_altivec_stvebx;
16460
0
      break;
16461
0
    case PPC::BI__builtin_altivec_stvehx:
16462
0
      ID = Intrinsic::ppc_altivec_stvehx;
16463
0
      break;
16464
0
    case PPC::BI__builtin_altivec_stvewx:
16465
0
      ID = Intrinsic::ppc_altivec_stvewx;
16466
0
      break;
16467
0
    case PPC::BI__builtin_vsx_stxvd2x:
16468
0
      ID = Intrinsic::ppc_vsx_stxvd2x;
16469
0
      break;
16470
0
    case PPC::BI__builtin_vsx_stxvw4x:
16471
0
      ID = Intrinsic::ppc_vsx_stxvw4x;
16472
0
      break;
16473
0
    case PPC::BI__builtin_vsx_stxvd2x_be:
16474
0
      ID = Intrinsic::ppc_vsx_stxvd2x_be;
16475
0
      break;
16476
0
    case PPC::BI__builtin_vsx_stxvw4x_be:
16477
0
      ID = Intrinsic::ppc_vsx_stxvw4x_be;
16478
0
      break;
16479
0
    case PPC::BI__builtin_vsx_stxvl:
16480
0
      ID = Intrinsic::ppc_vsx_stxvl;
16481
0
      break;
16482
0
    case PPC::BI__builtin_vsx_stxvll:
16483
0
      ID = Intrinsic::ppc_vsx_stxvll;
16484
0
      break;
16485
0
    }
16486
0
    llvm::Function *F = CGM.getIntrinsic(ID);
16487
0
    return Builder.CreateCall(F, Ops, "");
16488
0
  }
16489
0
  case PPC::BI__builtin_vsx_ldrmb: {
16490
    // Essentially boils down to performing an unaligned VMX load sequence so
16491
    // as to avoid crossing a page boundary and then shuffling the elements
16492
    // into the right side of the vector register.
16493
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16494
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16495
0
    int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16496
0
    llvm::Type *ResTy = ConvertType(E->getType());
16497
0
    bool IsLE = getTarget().isLittleEndian();
16498
16499
    // If the user wants the entire vector, just load the entire vector.
16500
0
    if (NumBytes == 16) {
16501
0
      Value *LD =
16502
0
          Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1)));
16503
0
      if (!IsLE)
16504
0
        return LD;
16505
16506
      // Reverse the bytes on LE.
16507
0
      SmallVector<int, 16> RevMask;
16508
0
      for (int Idx = 0; Idx < 16; Idx++)
16509
0
        RevMask.push_back(15 - Idx);
16510
0
      return Builder.CreateShuffleVector(LD, LD, RevMask);
16511
0
    }
16512
16513
0
    llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
16514
0
    llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
16515
0
                                                : Intrinsic::ppc_altivec_lvsl);
16516
0
    llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
16517
0
    Value *HiMem = Builder.CreateGEP(
16518
0
        Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
16519
0
    Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
16520
0
    Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
16521
0
    Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
16522
16523
0
    Op0 = IsLE ? HiLd : LoLd;
16524
0
    Op1 = IsLE ? LoLd : HiLd;
16525
0
    Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
16526
0
    Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
16527
16528
0
    if (IsLE) {
16529
0
      SmallVector<int, 16> Consts;
16530
0
      for (int Idx = 0; Idx < 16; Idx++) {
16531
0
        int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
16532
0
                                            : 16 - (NumBytes - Idx);
16533
0
        Consts.push_back(Val);
16534
0
      }
16535
0
      return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
16536
0
                                         Zero, Consts);
16537
0
    }
16538
0
    SmallVector<Constant *, 16> Consts;
16539
0
    for (int Idx = 0; Idx < 16; Idx++)
16540
0
      Consts.push_back(Builder.getInt8(NumBytes + Idx));
16541
0
    Value *Mask2 = ConstantVector::get(Consts);
16542
0
    return Builder.CreateBitCast(
16543
0
        Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
16544
0
  }
16545
0
  case PPC::BI__builtin_vsx_strmb: {
16546
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16547
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16548
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16549
0
    int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16550
0
    bool IsLE = getTarget().isLittleEndian();
16551
0
    auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
16552
      // Storing the whole vector, simply store it on BE and reverse bytes and
16553
      // store on LE.
16554
0
      if (Width == 16) {
16555
0
        Value *StVec = Op2;
16556
0
        if (IsLE) {
16557
0
          SmallVector<int, 16> RevMask;
16558
0
          for (int Idx = 0; Idx < 16; Idx++)
16559
0
            RevMask.push_back(15 - Idx);
16560
0
          StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
16561
0
        }
16562
0
        return Builder.CreateStore(
16563
0
            StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
16564
0
      }
16565
0
      auto *ConvTy = Int64Ty;
16566
0
      unsigned NumElts = 0;
16567
0
      switch (Width) {
16568
0
      default:
16569
0
        llvm_unreachable("width for stores must be a power of 2");
16570
0
      case 8:
16571
0
        ConvTy = Int64Ty;
16572
0
        NumElts = 2;
16573
0
        break;
16574
0
      case 4:
16575
0
        ConvTy = Int32Ty;
16576
0
        NumElts = 4;
16577
0
        break;
16578
0
      case 2:
16579
0
        ConvTy = Int16Ty;
16580
0
        NumElts = 8;
16581
0
        break;
16582
0
      case 1:
16583
0
        ConvTy = Int8Ty;
16584
0
        NumElts = 16;
16585
0
        break;
16586
0
      }
16587
0
      Value *Vec = Builder.CreateBitCast(
16588
0
          Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
16589
0
      Value *Ptr =
16590
0
          Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
16591
0
      Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
16592
0
      if (IsLE && Width > 1) {
16593
0
        Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
16594
0
        Elt = Builder.CreateCall(F, Elt);
16595
0
      }
16596
0
      return Builder.CreateStore(
16597
0
          Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
16598
0
    };
16599
0
    unsigned Stored = 0;
16600
0
    unsigned RemainingBytes = NumBytes;
16601
0
    Value *Result;
16602
0
    if (NumBytes == 16)
16603
0
      return StoreSubVec(16, 0, 0);
16604
0
    if (NumBytes >= 8) {
16605
0
      Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
16606
0
      RemainingBytes -= 8;
16607
0
      Stored += 8;
16608
0
    }
16609
0
    if (RemainingBytes >= 4) {
16610
0
      Result = StoreSubVec(4, NumBytes - Stored - 4,
16611
0
                           IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
16612
0
      RemainingBytes -= 4;
16613
0
      Stored += 4;
16614
0
    }
16615
0
    if (RemainingBytes >= 2) {
16616
0
      Result = StoreSubVec(2, NumBytes - Stored - 2,
16617
0
                           IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
16618
0
      RemainingBytes -= 2;
16619
0
      Stored += 2;
16620
0
    }
16621
0
    if (RemainingBytes)
16622
0
      Result =
16623
0
          StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
16624
0
    return Result;
16625
0
  }
16626
  // Square root
16627
0
  case PPC::BI__builtin_vsx_xvsqrtsp:
16628
0
  case PPC::BI__builtin_vsx_xvsqrtdp: {
16629
0
    llvm::Type *ResultType = ConvertType(E->getType());
16630
0
    Value *X = EmitScalarExpr(E->getArg(0));
16631
0
    if (Builder.getIsFPConstrained()) {
16632
0
      llvm::Function *F = CGM.getIntrinsic(
16633
0
          Intrinsic::experimental_constrained_sqrt, ResultType);
16634
0
      return Builder.CreateConstrainedFPCall(F, X);
16635
0
    } else {
16636
0
      llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
16637
0
      return Builder.CreateCall(F, X);
16638
0
    }
16639
0
  }
16640
  // Count leading zeros
16641
0
  case PPC::BI__builtin_altivec_vclzb:
16642
0
  case PPC::BI__builtin_altivec_vclzh:
16643
0
  case PPC::BI__builtin_altivec_vclzw:
16644
0
  case PPC::BI__builtin_altivec_vclzd: {
16645
0
    llvm::Type *ResultType = ConvertType(E->getType());
16646
0
    Value *X = EmitScalarExpr(E->getArg(0));
16647
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
16648
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
16649
0
    return Builder.CreateCall(F, {X, Undef});
16650
0
  }
16651
0
  case PPC::BI__builtin_altivec_vctzb:
16652
0
  case PPC::BI__builtin_altivec_vctzh:
16653
0
  case PPC::BI__builtin_altivec_vctzw:
16654
0
  case PPC::BI__builtin_altivec_vctzd: {
16655
0
    llvm::Type *ResultType = ConvertType(E->getType());
16656
0
    Value *X = EmitScalarExpr(E->getArg(0));
16657
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
16658
0
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
16659
0
    return Builder.CreateCall(F, {X, Undef});
16660
0
  }
16661
0
  case PPC::BI__builtin_altivec_vinsd:
16662
0
  case PPC::BI__builtin_altivec_vinsw:
16663
0
  case PPC::BI__builtin_altivec_vinsd_elt:
16664
0
  case PPC::BI__builtin_altivec_vinsw_elt: {
16665
0
    llvm::Type *ResultType = ConvertType(E->getType());
16666
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16667
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16668
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16669
16670
0
    bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
16671
0
                        BuiltinID == PPC::BI__builtin_altivec_vinsd);
16672
16673
0
    bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
16674
0
                    BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
16675
16676
    // The third argument must be a compile time constant.
16677
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16678
0
    assert(ArgCI &&
16679
0
           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
16680
16681
    // Valid value for the third argument is dependent on the input type and
16682
    // builtin called.
16683
0
    int ValidMaxValue = 0;
16684
0
    if (IsUnaligned)
16685
0
      ValidMaxValue = (Is32bit) ? 12 : 8;
16686
0
    else
16687
0
      ValidMaxValue = (Is32bit) ? 3 : 1;
16688
16689
    // Get value of third argument.
16690
0
    int64_t ConstArg = ArgCI->getSExtValue();
16691
16692
    // Compose range checking error message.
16693
0
    std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
16694
0
    RangeErrMsg += " number " + llvm::to_string(ConstArg);
16695
0
    RangeErrMsg += " is outside of the valid range [0, ";
16696
0
    RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
16697
16698
    // Issue error if third argument is not within the valid range.
16699
0
    if (ConstArg < 0 || ConstArg > ValidMaxValue)
16700
0
      CGM.Error(E->getExprLoc(), RangeErrMsg);
16701
16702
    // Input to vec_replace_elt is an element index, convert to byte index.
16703
0
    if (!IsUnaligned) {
16704
0
      ConstArg *= Is32bit ? 4 : 8;
16705
      // Fix the constant according to endianess.
16706
0
      if (getTarget().isLittleEndian())
16707
0
        ConstArg = (Is32bit ? 12 : 8) - ConstArg;
16708
0
    }
16709
16710
0
    ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
16711
0
    Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
16712
    // Casting input to vector int as per intrinsic definition.
16713
0
    Op0 =
16714
0
        Is32bit
16715
0
            ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
16716
0
            : Builder.CreateBitCast(Op0,
16717
0
                                    llvm::FixedVectorType::get(Int64Ty, 2));
16718
0
    return Builder.CreateBitCast(
16719
0
        Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
16720
0
  }
16721
0
  case PPC::BI__builtin_altivec_vpopcntb:
16722
0
  case PPC::BI__builtin_altivec_vpopcnth:
16723
0
  case PPC::BI__builtin_altivec_vpopcntw:
16724
0
  case PPC::BI__builtin_altivec_vpopcntd: {
16725
0
    llvm::Type *ResultType = ConvertType(E->getType());
16726
0
    Value *X = EmitScalarExpr(E->getArg(0));
16727
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
16728
0
    return Builder.CreateCall(F, X);
16729
0
  }
16730
0
  case PPC::BI__builtin_altivec_vadduqm:
16731
0
  case PPC::BI__builtin_altivec_vsubuqm: {
16732
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16733
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16734
0
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16735
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
16736
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
16737
0
    if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
16738
0
      return Builder.CreateAdd(Op0, Op1, "vadduqm");
16739
0
    else
16740
0
      return Builder.CreateSub(Op0, Op1, "vsubuqm");
16741
0
  }
16742
0
  case PPC::BI__builtin_altivec_vaddcuq_c:
16743
0
  case PPC::BI__builtin_altivec_vsubcuq_c: {
16744
0
    SmallVector<Value *, 2> Ops;
16745
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16746
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16747
0
    llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
16748
0
        llvm::IntegerType::get(getLLVMContext(), 128), 1);
16749
0
    Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
16750
0
    Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
16751
0
    ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
16752
0
             ? Intrinsic::ppc_altivec_vaddcuq
16753
0
             : Intrinsic::ppc_altivec_vsubcuq;
16754
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
16755
0
  }
16756
0
  case PPC::BI__builtin_altivec_vaddeuqm_c:
16757
0
  case PPC::BI__builtin_altivec_vaddecuq_c:
16758
0
  case PPC::BI__builtin_altivec_vsubeuqm_c:
16759
0
  case PPC::BI__builtin_altivec_vsubecuq_c: {
16760
0
    SmallVector<Value *, 3> Ops;
16761
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16762
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16763
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16764
0
    llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
16765
0
        llvm::IntegerType::get(getLLVMContext(), 128), 1);
16766
0
    Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
16767
0
    Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
16768
0
    Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
16769
0
    switch (BuiltinID) {
16770
0
    default:
16771
0
      llvm_unreachable("Unsupported intrinsic!");
16772
0
    case PPC::BI__builtin_altivec_vaddeuqm_c:
16773
0
      ID = Intrinsic::ppc_altivec_vaddeuqm;
16774
0
      break;
16775
0
    case PPC::BI__builtin_altivec_vaddecuq_c:
16776
0
      ID = Intrinsic::ppc_altivec_vaddecuq;
16777
0
      break;
16778
0
    case PPC::BI__builtin_altivec_vsubeuqm_c:
16779
0
      ID = Intrinsic::ppc_altivec_vsubeuqm;
16780
0
      break;
16781
0
    case PPC::BI__builtin_altivec_vsubecuq_c:
16782
0
      ID = Intrinsic::ppc_altivec_vsubecuq;
16783
0
      break;
16784
0
    }
16785
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
16786
0
  }
16787
  // Rotate and insert under mask operation.
16788
  // __rldimi(rs, is, shift, mask)
16789
  // (rotl64(rs, shift) & mask) | (is & ~mask)
16790
  // __rlwimi(rs, is, shift, mask)
16791
  // (rotl(rs, shift) & mask) | (is & ~mask)
16792
0
  case PPC::BI__builtin_ppc_rldimi:
16793
0
  case PPC::BI__builtin_ppc_rlwimi: {
16794
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16795
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16796
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16797
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
16798
0
    llvm::Type *Ty = Op0->getType();
16799
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
16800
0
    if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
16801
0
      Op2 = Builder.CreateZExt(Op2, Int64Ty);
16802
0
    Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
16803
0
    Value *X = Builder.CreateAnd(Shift, Op3);
16804
0
    Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
16805
0
    return Builder.CreateOr(X, Y);
16806
0
  }
16807
  // Rotate and insert under mask operation.
16808
  // __rlwnm(rs, shift, mask)
16809
  // rotl(rs, shift) & mask
16810
0
  case PPC::BI__builtin_ppc_rlwnm: {
16811
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16812
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16813
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16814
0
    llvm::Type *Ty = Op0->getType();
16815
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
16816
0
    Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
16817
0
    return Builder.CreateAnd(Shift, Op2);
16818
0
  }
16819
0
  case PPC::BI__builtin_ppc_poppar4:
16820
0
  case PPC::BI__builtin_ppc_poppar8: {
16821
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16822
0
    llvm::Type *ArgType = Op0->getType();
16823
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
16824
0
    Value *Tmp = Builder.CreateCall(F, Op0);
16825
16826
0
    llvm::Type *ResultType = ConvertType(E->getType());
16827
0
    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
16828
0
    if (Result->getType() != ResultType)
16829
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
16830
0
                                     "cast");
16831
0
    return Result;
16832
0
  }
16833
0
  case PPC::BI__builtin_ppc_cmpb: {
16834
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16835
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16836
0
    if (getTarget().getTriple().isPPC64()) {
16837
0
      Function *F =
16838
0
          CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
16839
0
      return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
16840
0
    }
16841
    // For 32 bit, emit the code as below:
16842
    // %conv = trunc i64 %a to i32
16843
    // %conv1 = trunc i64 %b to i32
16844
    // %shr = lshr i64 %a, 32
16845
    // %conv2 = trunc i64 %shr to i32
16846
    // %shr3 = lshr i64 %b, 32
16847
    // %conv4 = trunc i64 %shr3 to i32
16848
    // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
16849
    // %conv5 = zext i32 %0 to i64
16850
    // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
16851
    // %conv614 = zext i32 %1 to i64
16852
    // %shl = shl nuw i64 %conv614, 32
16853
    // %or = or i64 %shl, %conv5
16854
    // ret i64 %or
16855
0
    Function *F =
16856
0
        CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
16857
0
    Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
16858
0
    Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
16859
0
    Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
16860
0
    Value *ArgOneHi =
16861
0
        Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
16862
0
    Value *ArgTwoHi =
16863
0
        Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
16864
0
    Value *ResLo = Builder.CreateZExt(
16865
0
        Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
16866
0
    Value *ResHiShift = Builder.CreateZExt(
16867
0
        Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
16868
0
    Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
16869
0
    return Builder.CreateOr(ResLo, ResHi);
16870
0
  }
16871
  // Copy sign
16872
0
  case PPC::BI__builtin_vsx_xvcpsgnsp:
16873
0
  case PPC::BI__builtin_vsx_xvcpsgndp: {
16874
0
    llvm::Type *ResultType = ConvertType(E->getType());
16875
0
    Value *X = EmitScalarExpr(E->getArg(0));
16876
0
    Value *Y = EmitScalarExpr(E->getArg(1));
16877
0
    ID = Intrinsic::copysign;
16878
0
    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
16879
0
    return Builder.CreateCall(F, {X, Y});
16880
0
  }
16881
  // Rounding/truncation
16882
0
  case PPC::BI__builtin_vsx_xvrspip:
16883
0
  case PPC::BI__builtin_vsx_xvrdpip:
16884
0
  case PPC::BI__builtin_vsx_xvrdpim:
16885
0
  case PPC::BI__builtin_vsx_xvrspim:
16886
0
  case PPC::BI__builtin_vsx_xvrdpi:
16887
0
  case PPC::BI__builtin_vsx_xvrspi:
16888
0
  case PPC::BI__builtin_vsx_xvrdpic:
16889
0
  case PPC::BI__builtin_vsx_xvrspic:
16890
0
  case PPC::BI__builtin_vsx_xvrdpiz:
16891
0
  case PPC::BI__builtin_vsx_xvrspiz: {
16892
0
    llvm::Type *ResultType = ConvertType(E->getType());
16893
0
    Value *X = EmitScalarExpr(E->getArg(0));
16894
0
    if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
16895
0
        BuiltinID == PPC::BI__builtin_vsx_xvrspim)
16896
0
      ID = Builder.getIsFPConstrained()
16897
0
               ? Intrinsic::experimental_constrained_floor
16898
0
               : Intrinsic::floor;
16899
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
16900
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspi)
16901
0
      ID = Builder.getIsFPConstrained()
16902
0
               ? Intrinsic::experimental_constrained_round
16903
0
               : Intrinsic::round;
16904
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
16905
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspic)
16906
0
      ID = Builder.getIsFPConstrained()
16907
0
               ? Intrinsic::experimental_constrained_rint
16908
0
               : Intrinsic::rint;
16909
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
16910
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspip)
16911
0
      ID = Builder.getIsFPConstrained()
16912
0
               ? Intrinsic::experimental_constrained_ceil
16913
0
               : Intrinsic::ceil;
16914
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
16915
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
16916
0
      ID = Builder.getIsFPConstrained()
16917
0
               ? Intrinsic::experimental_constrained_trunc
16918
0
               : Intrinsic::trunc;
16919
0
    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
16920
0
    return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
16921
0
                                        : Builder.CreateCall(F, X);
16922
0
  }
16923
16924
  // Absolute value
16925
0
  case PPC::BI__builtin_vsx_xvabsdp:
16926
0
  case PPC::BI__builtin_vsx_xvabssp: {
16927
0
    llvm::Type *ResultType = ConvertType(E->getType());
16928
0
    Value *X = EmitScalarExpr(E->getArg(0));
16929
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
16930
0
    return Builder.CreateCall(F, X);
16931
0
  }
16932
16933
  // Fastmath by default
16934
0
  case PPC::BI__builtin_ppc_recipdivf:
16935
0
  case PPC::BI__builtin_ppc_recipdivd:
16936
0
  case PPC::BI__builtin_ppc_rsqrtf:
16937
0
  case PPC::BI__builtin_ppc_rsqrtd: {
16938
0
    FastMathFlags FMF = Builder.getFastMathFlags();
16939
0
    Builder.getFastMathFlags().setFast();
16940
0
    llvm::Type *ResultType = ConvertType(E->getType());
16941
0
    Value *X = EmitScalarExpr(E->getArg(0));
16942
16943
0
    if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
16944
0
        BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
16945
0
      Value *Y = EmitScalarExpr(E->getArg(1));
16946
0
      Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
16947
0
      Builder.getFastMathFlags() &= (FMF);
16948
0
      return FDiv;
16949
0
    }
16950
0
    auto *One = ConstantFP::get(ResultType, 1.0);
16951
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
16952
0
    Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
16953
0
    Builder.getFastMathFlags() &= (FMF);
16954
0
    return FDiv;
16955
0
  }
16956
0
  case PPC::BI__builtin_ppc_alignx: {
16957
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16958
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16959
0
    ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
16960
0
    if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
16961
0
      AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
16962
0
                                     llvm::Value::MaximumAlignment);
16963
16964
0
    emitAlignmentAssumption(Op1, E->getArg(1),
16965
0
                            /*The expr loc is sufficient.*/ SourceLocation(),
16966
0
                            AlignmentCI, nullptr);
16967
0
    return Op1;
16968
0
  }
16969
0
  case PPC::BI__builtin_ppc_rdlam: {
16970
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16971
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16972
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16973
0
    llvm::Type *Ty = Op0->getType();
16974
0
    Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
16975
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
16976
0
    Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
16977
0
    return Builder.CreateAnd(Rotate, Op2);
16978
0
  }
16979
0
  case PPC::BI__builtin_ppc_load2r: {
16980
0
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
16981
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16982
0
    Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
16983
0
    return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
16984
0
  }
16985
  // FMA variations
16986
4
  case PPC::BI__builtin_ppc_fnmsub:
16987
8
  case PPC::BI__builtin_ppc_fnmsubs:
16988
9
  case PPC::BI__builtin_vsx_xvmaddadp:
16989
10
  case PPC::BI__builtin_vsx_xvmaddasp:
16990
11
  case PPC::BI__builtin_vsx_xvnmaddadp:
16991
12
  case PPC::BI__builtin_vsx_xvnmaddasp:
16992
13
  case PPC::BI__builtin_vsx_xvmsubadp:
16993
14
  case PPC::BI__builtin_vsx_xvmsubasp:
16994
15
  case PPC::BI__builtin_vsx_xvnmsubadp:
16995
16
  case PPC::BI__builtin_vsx_xvnmsubasp: {
16996
16
    llvm::Type *ResultType = ConvertType(E->getType());
16997
16
    Value *X = EmitScalarExpr(E->getArg(0));
16998
16
    Value *Y = EmitScalarExpr(E->getArg(1));
16999
16
    Value *Z = EmitScalarExpr(E->getArg(2));
17000
16
    llvm::Function *F;
17001
16
    if (Builder.getIsFPConstrained())
17002
0
      F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17003
16
    else
17004
16
      F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17005
16
    switch (BuiltinID) {
17006
1
      case PPC::BI__builtin_vsx_xvmaddadp:
17007
2
      case PPC::BI__builtin_vsx_xvmaddasp:
17008
2
        if (Builder.getIsFPConstrained())
17009
0
          return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17010
2
        else
17011
2
          return Builder.CreateCall(F, {X, Y, Z});
17012
1
      case PPC::BI__builtin_vsx_xvnmaddadp:
17013
2
      case PPC::BI__builtin_vsx_xvnmaddasp:
17014
2
        if (Builder.getIsFPConstrained())
17015
0
          return Builder.CreateFNeg(
17016
0
              Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17017
2
        else
17018
2
          return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17019
1
      case PPC::BI__builtin_vsx_xvmsubadp:
17020
2
      case PPC::BI__builtin_vsx_xvmsubasp:
17021
2
        if (Builder.getIsFPConstrained())
17022
0
          return Builder.CreateConstrainedFPCall(
17023
0
              F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17024
2
        else
17025
2
          return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17026
4
      case PPC::BI__builtin_ppc_fnmsub:
17027
8
      case PPC::BI__builtin_ppc_fnmsubs:
17028
9
      case PPC::BI__builtin_vsx_xvnmsubadp:
17029
10
      case PPC::BI__builtin_vsx_xvnmsubasp:
17030
10
        if (Builder.getIsFPConstrained())
17031
0
          return Builder.CreateFNeg(
17032
0
              Builder.CreateConstrainedFPCall(
17033
0
                  F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17034
0
              "neg");
17035
10
        else
17036
10
          return Builder.CreateCall(
17037
10
              CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17038
16
      }
17039
0
    llvm_unreachable("Unknown FMA operation");
17040
0
    return nullptr; // Suppress no-return warning
17041
16
  }
17042
17043
0
  case PPC::BI__builtin_vsx_insertword: {
17044
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17045
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17046
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17047
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17048
17049
    // Third argument is a compile time constant int. It must be clamped to
17050
    // to the range [0, 12].
17051
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17052
0
    assert(ArgCI &&
17053
0
           "Third arg to xxinsertw intrinsic must be constant integer");
17054
0
    const int64_t MaxIndex = 12;
17055
0
    int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17056
17057
    // The builtin semantics don't exactly match the xxinsertw instructions
17058
    // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17059
    // word from the first argument, and inserts it in the second argument. The
17060
    // instruction extracts the word from its second input register and inserts
17061
    // it into its first input register, so swap the first and second arguments.
17062
0
    std::swap(Op0, Op1);
17063
17064
    // Need to cast the second argument from a vector of unsigned int to a
17065
    // vector of long long.
17066
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17067
17068
0
    if (getTarget().isLittleEndian()) {
17069
      // Reverse the double words in the vector we will extract from.
17070
0
      Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17071
0
      Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17072
17073
      // Reverse the index.
17074
0
      Index = MaxIndex - Index;
17075
0
    }
17076
17077
    // Intrinsic expects the first arg to be a vector of int.
17078
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17079
0
    Op2 = ConstantInt::getSigned(Int32Ty, Index);
17080
0
    return Builder.CreateCall(F, {Op0, Op1, Op2});
17081
0
  }
17082
17083
0
  case PPC::BI__builtin_vsx_extractuword: {
17084
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17085
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17086
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17087
17088
    // Intrinsic expects the first argument to be a vector of doublewords.
17089
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17090
17091
    // The second argument is a compile time constant int that needs to
17092
    // be clamped to the range [0, 12].
17093
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17094
0
    assert(ArgCI &&
17095
0
           "Second Arg to xxextractuw intrinsic must be a constant integer!");
17096
0
    const int64_t MaxIndex = 12;
17097
0
    int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17098
17099
0
    if (getTarget().isLittleEndian()) {
17100
      // Reverse the index.
17101
0
      Index = MaxIndex - Index;
17102
0
      Op1 = ConstantInt::getSigned(Int32Ty, Index);
17103
17104
      // Emit the call, then reverse the double words of the results vector.
17105
0
      Value *Call = Builder.CreateCall(F, {Op0, Op1});
17106
17107
0
      Value *ShuffleCall =
17108
0
          Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17109
0
      return ShuffleCall;
17110
0
    } else {
17111
0
      Op1 = ConstantInt::getSigned(Int32Ty, Index);
17112
0
      return Builder.CreateCall(F, {Op0, Op1});
17113
0
    }
17114
0
  }
17115
17116
0
  case PPC::BI__builtin_vsx_xxpermdi: {
17117
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17118
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17119
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17120
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17121
0
    assert(ArgCI && "Third arg must be constant integer!");
17122
17123
0
    unsigned Index = ArgCI->getZExtValue();
17124
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17125
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17126
17127
    // Account for endianness by treating this as just a shuffle. So we use the
17128
    // same indices for both LE and BE in order to produce expected results in
17129
    // both cases.
17130
0
    int ElemIdx0 = (Index & 2) >> 1;
17131
0
    int ElemIdx1 = 2 + (Index & 1);
17132
17133
0
    int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17134
0
    Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17135
0
    QualType BIRetType = E->getType();
17136
0
    auto RetTy = ConvertType(BIRetType);
17137
0
    return Builder.CreateBitCast(ShuffleCall, RetTy);
17138
0
  }
17139
17140
0
  case PPC::BI__builtin_vsx_xxsldwi: {
17141
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17142
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17143
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17144
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17145
0
    assert(ArgCI && "Third argument must be a compile time constant");
17146
0
    unsigned Index = ArgCI->getZExtValue() & 0x3;
17147
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17148
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17149
17150
    // Create a shuffle mask
17151
0
    int ElemIdx0;
17152
0
    int ElemIdx1;
17153
0
    int ElemIdx2;
17154
0
    int ElemIdx3;
17155
0
    if (getTarget().isLittleEndian()) {
17156
      // Little endian element N comes from element 8+N-Index of the
17157
      // concatenated wide vector (of course, using modulo arithmetic on
17158
      // the total number of elements).
17159
0
      ElemIdx0 = (8 - Index) % 8;
17160
0
      ElemIdx1 = (9 - Index) % 8;
17161
0
      ElemIdx2 = (10 - Index) % 8;
17162
0
      ElemIdx3 = (11 - Index) % 8;
17163
0
    } else {
17164
      // Big endian ElemIdx<N> = Index + N
17165
0
      ElemIdx0 = Index;
17166
0
      ElemIdx1 = Index + 1;
17167
0
      ElemIdx2 = Index + 2;
17168
0
      ElemIdx3 = Index + 3;
17169
0
    }
17170
17171
0
    int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17172
0
    Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17173
0
    QualType BIRetType = E->getType();
17174
0
    auto RetTy = ConvertType(BIRetType);
17175
0
    return Builder.CreateBitCast(ShuffleCall, RetTy);
17176
0
  }
17177
17178
0
  case PPC::BI__builtin_pack_vector_int128: {
17179
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17180
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17181
0
    bool isLittleEndian = getTarget().isLittleEndian();
17182
0
    Value *PoisonValue =
17183
0
        llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17184
0
    Value *Res = Builder.CreateInsertElement(
17185
0
        PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17186
0
    Res = Builder.CreateInsertElement(Res, Op1,
17187
0
                                      (uint64_t)(isLittleEndian ? 0 : 1));
17188
0
    return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17189
0
  }
17190
17191
0
  case PPC::BI__builtin_unpack_vector_int128: {
17192
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17193
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17194
0
    ConstantInt *Index = cast<ConstantInt>(Op1);
17195
0
    Value *Unpacked = Builder.CreateBitCast(
17196
0
        Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17197
17198
0
    if (getTarget().isLittleEndian())
17199
0
      Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
17200
17201
0
    return Builder.CreateExtractElement(Unpacked, Index);
17202
0
  }
17203
17204
0
  case PPC::BI__builtin_ppc_sthcx: {
17205
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17206
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17207
0
    Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17208
0
    return Builder.CreateCall(F, {Op0, Op1});
17209
0
  }
17210
17211
  // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17212
  // Some of the MMA instructions accumulate their result into an existing
17213
  // accumulator whereas the others generate a new accumulator. So we need to
17214
  // use custom code generation to expand a builtin call with a pointer to a
17215
  // load (if the corresponding instruction accumulates its result) followed by
17216
  // the call to the intrinsic and a store of the result.
17217
0
#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17218
11.0k
  case PPC::BI__builtin_##Name:
17219
11.0k
#include 
"clang/Basic/BuiltinsPPC.def"0
17220
11.0k
  {
17221
11.0k
    SmallVector<Value *, 4> Ops;
17222
11.0k
    for (unsigned i = 0, e = E->getNumArgs(); 
i != e1.06k
;
i++822
)
17223
822
      if (E->getArg(i)->getType()->isArrayType())
17224
4
        Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
17225
818
      else
17226
818
        Ops.push_back(EmitScalarExpr(E->getArg(i)));
17227
    // The first argument of these two builtins is a pointer used to store their
17228
    // result. However, the llvm intrinsics return their result in multiple
17229
    // return values. So, here we emit code extracting these values from the
17230
    // intrinsic results and storing them using that pointer.
17231
11.0k
    if (
BuiltinID == PPC::BI__builtin_mma_disassemble_acc240
||
17232
240
        
BuiltinID == PPC::BI__builtin_vsx_disassemble_pair234
||
17233
240
        
BuiltinID == PPC::BI__builtin_mma_disassemble_pair231
) {
17234
11
      unsigned NumVecs = 2;
17235
11
      auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17236
11
      if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17237
6
        NumVecs = 4;
17238
6
        Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17239
6
      }
17240
11
      llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17241
11
      Address Addr = EmitPointerWithAlignment(E->getArg(1));
17242
11
      Value *Vec = Builder.CreateLoad(Addr);
17243
11
      Value *Call = Builder.CreateCall(F, {Vec});
17244
11
      llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17245
11
      Value *Ptr = Ops[0];
17246
45
      for (unsigned i=0; i<NumVecs; 
i++34
) {
17247
34
        Value *Vec = Builder.CreateExtractValue(Call, i);
17248
34
        llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17249
34
        Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17250
34
        Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17251
34
      }
17252
11
      return Call;
17253
11
    }
17254
229
    if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17255
229
        
BuiltinID == PPC::BI__builtin_mma_build_acc225
) {
17256
      // Reverse the order of the operands for LE, so the
17257
      // same builtin call can be used on both LE and BE
17258
      // without the need for the programmer to swap operands.
17259
      // The operands are reversed starting from the second argument,
17260
      // the first operand is the pointer to the pair/accumulator
17261
      // that is being built.
17262
6
      if (getTarget().isLittleEndian())
17263
3
        std::reverse(Ops.begin() + 1, Ops.end());
17264
6
    }
17265
229
    bool Accumulate;
17266
229
    switch (BuiltinID) {
17267
0
  #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17268
229
    case PPC::BI__builtin_##Name: \
17269
229
      ID = Intrinsic::ppc_##Intr; \
17270
229
      Accumulate = Acc; \
17271
229
      break;
17272
229
  #include 
"clang/Basic/BuiltinsPPC.def"0
17273
229
    }
17274
229
    if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17275
229
        
BuiltinID == PPC::BI__builtin_vsx_stxvp208
||
17276
229
        
BuiltinID == PPC::BI__builtin_mma_lxvp193
||
17277
229
        
BuiltinID == PPC::BI__builtin_mma_stxvp173
) {
17278
70
      if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17279
70
          
BuiltinID == PPC::BI__builtin_mma_lxvp49
) {
17280
41
        Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17281
41
      } else {
17282
29
        Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17283
29
      }
17284
70
      Ops.pop_back();
17285
70
      llvm::Function *F = CGM.getIntrinsic(ID);
17286
70
      return Builder.CreateCall(F, Ops, "");
17287
70
    }
17288
159
    SmallVector<Value*, 4> CallOps;
17289
159
    if (Accumulate) {
17290
102
      Address Addr = EmitPointerWithAlignment(E->getArg(0));
17291
102
      Value *Acc = Builder.CreateLoad(Addr);
17292
102
      CallOps.push_back(Acc);
17293
102
    }
17294
631
    for (unsigned i=1; i<Ops.size(); 
i++472
)
17295
472
      CallOps.push_back(Ops[i]);
17296
159
    llvm::Function *F = CGM.getIntrinsic(ID);
17297
159
    Value *Call = Builder.CreateCall(F, CallOps);
17298
159
    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17299
229
  }
17300
17301
0
  case PPC::BI__builtin_ppc_compare_and_swap:
17302
0
  case PPC::BI__builtin_ppc_compare_and_swaplp: {
17303
0
    Address Addr = EmitPointerWithAlignment(E->getArg(0));
17304
0
    Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17305
0
    Value *OldVal = Builder.CreateLoad(OldValAddr);
17306
0
    QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17307
0
    LValue LV = MakeAddrLValue(Addr, AtomicTy);
17308
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17309
0
    auto Pair = EmitAtomicCompareExchange(
17310
0
        LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17311
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17312
    // Unlike c11's atomic_compare_exchange, according to
17313
    // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17314
    // > In either case, the contents of the memory location specified by addr
17315
    // > are copied into the memory location specified by old_val_addr.
17316
    // But it hasn't specified storing to OldValAddr is atomic or not and
17317
    // which order to use. Now following XL's codegen, treat it as a normal
17318
    // store.
17319
0
    Value *LoadedVal = Pair.first.getScalarVal();
17320
0
    Builder.CreateStore(LoadedVal, OldValAddr);
17321
0
    return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17322
0
  }
17323
0
  case PPC::BI__builtin_ppc_fetch_and_add:
17324
0
  case PPC::BI__builtin_ppc_fetch_and_addlp: {
17325
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
17326
0
                                 llvm::AtomicOrdering::Monotonic);
17327
0
  }
17328
0
  case PPC::BI__builtin_ppc_fetch_and_and:
17329
0
  case PPC::BI__builtin_ppc_fetch_and_andlp: {
17330
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
17331
0
                                 llvm::AtomicOrdering::Monotonic);
17332
0
  }
17333
17334
0
  case PPC::BI__builtin_ppc_fetch_and_or:
17335
0
  case PPC::BI__builtin_ppc_fetch_and_orlp: {
17336
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
17337
0
                                 llvm::AtomicOrdering::Monotonic);
17338
0
  }
17339
0
  case PPC::BI__builtin_ppc_fetch_and_swap:
17340
0
  case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17341
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
17342
0
                                 llvm::AtomicOrdering::Monotonic);
17343
0
  }
17344
0
  case PPC::BI__builtin_ppc_ldarx:
17345
0
  case PPC::BI__builtin_ppc_lwarx:
17346
0
  case PPC::BI__builtin_ppc_lharx:
17347
0
  case PPC::BI__builtin_ppc_lbarx:
17348
0
    return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
17349
4
  case PPC::BI__builtin_ppc_mfspr: {
17350
4
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17351
4
    llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17352
4
                              ? 
Int32Ty1
17353
4
                              : 
Int64Ty3
;
17354
4
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17355
4
    return Builder.CreateCall(F, {Op0});
17356
0
  }
17357
4
  case PPC::BI__builtin_ppc_mtspr: {
17358
4
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17359
4
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17360
4
    llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17361
4
                              ? 
Int32Ty1
17362
4
                              : 
Int64Ty3
;
17363
4
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17364
4
    return Builder.CreateCall(F, {Op0, Op1});
17365
0
  }
17366
0
  case PPC::BI__builtin_ppc_popcntb: {
17367
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
17368
0
    llvm::Type *ArgType = ArgValue->getType();
17369
0
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17370
0
    return Builder.CreateCall(F, {ArgValue}, "popcntb");
17371
0
  }
17372
4
  case PPC::BI__builtin_ppc_mtfsf: {
17373
    // The builtin takes a uint32 that needs to be cast to an
17374
    // f64 to be passed to the intrinsic.
17375
4
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17376
4
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17377
4
    Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
17378
4
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
17379
4
    return Builder.CreateCall(F, {Op0, Cast}, "");
17380
0
  }
17381
17382
0
  case PPC::BI__builtin_ppc_swdiv_nochk:
17383
0
  case PPC::BI__builtin_ppc_swdivs_nochk: {
17384
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17385
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17386
0
    FastMathFlags FMF = Builder.getFastMathFlags();
17387
0
    Builder.getFastMathFlags().setFast();
17388
0
    Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
17389
0
    Builder.getFastMathFlags() &= (FMF);
17390
0
    return FDiv;
17391
0
  }
17392
0
  case PPC::BI__builtin_ppc_fric:
17393
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17394
0
                           *this, E, Intrinsic::rint,
17395
0
                           Intrinsic::experimental_constrained_rint))
17396
0
        .getScalarVal();
17397
0
  case PPC::BI__builtin_ppc_frim:
17398
0
  case PPC::BI__builtin_ppc_frims:
17399
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17400
0
                           *this, E, Intrinsic::floor,
17401
0
                           Intrinsic::experimental_constrained_floor))
17402
0
        .getScalarVal();
17403
0
  case PPC::BI__builtin_ppc_frin:
17404
0
  case PPC::BI__builtin_ppc_frins:
17405
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17406
0
                           *this, E, Intrinsic::round,
17407
0
                           Intrinsic::experimental_constrained_round))
17408
0
        .getScalarVal();
17409
0
  case PPC::BI__builtin_ppc_frip:
17410
0
  case PPC::BI__builtin_ppc_frips:
17411
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17412
0
                           *this, E, Intrinsic::ceil,
17413
0
                           Intrinsic::experimental_constrained_ceil))
17414
0
        .getScalarVal();
17415
0
  case PPC::BI__builtin_ppc_friz:
17416
0
  case PPC::BI__builtin_ppc_frizs:
17417
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17418
0
                           *this, E, Intrinsic::trunc,
17419
0
                           Intrinsic::experimental_constrained_trunc))
17420
0
        .getScalarVal();
17421
0
  case PPC::BI__builtin_ppc_fsqrt:
17422
0
  case PPC::BI__builtin_ppc_fsqrts:
17423
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17424
0
                           *this, E, Intrinsic::sqrt,
17425
0
                           Intrinsic::experimental_constrained_sqrt))
17426
0
        .getScalarVal();
17427
1
  case PPC::BI__builtin_ppc_test_data_class: {
17428
1
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17429
1
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17430
1
    return Builder.CreateCall(
17431
1
        CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
17432
1
        {Op0, Op1}, "test_data_class");
17433
0
  }
17434
0
  case PPC::BI__builtin_ppc_maxfe: {
17435
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17436
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17437
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17438
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17439
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
17440
0
                              {Op0, Op1, Op2, Op3});
17441
0
  }
17442
0
  case PPC::BI__builtin_ppc_maxfl: {
17443
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17444
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17445
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17446
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17447
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
17448
0
                              {Op0, Op1, Op2, Op3});
17449
0
  }
17450
0
  case PPC::BI__builtin_ppc_maxfs: {
17451
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17452
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17453
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17454
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17455
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
17456
0
                              {Op0, Op1, Op2, Op3});
17457
0
  }
17458
0
  case PPC::BI__builtin_ppc_minfe: {
17459
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17460
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17461
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17462
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17463
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
17464
0
                              {Op0, Op1, Op2, Op3});
17465
0
  }
17466
0
  case PPC::BI__builtin_ppc_minfl: {
17467
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17468
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17469
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17470
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17471
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
17472
0
                              {Op0, Op1, Op2, Op3});
17473
0
  }
17474
0
  case PPC::BI__builtin_ppc_minfs: {
17475
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17476
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17477
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17478
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17479
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
17480
0
                              {Op0, Op1, Op2, Op3});
17481
0
  }
17482
0
  case PPC::BI__builtin_ppc_swdiv:
17483
0
  case PPC::BI__builtin_ppc_swdivs: {
17484
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17485
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17486
0
    return Builder.CreateFDiv(Op0, Op1, "swdiv");
17487
0
  }
17488
0
  case PPC::BI__builtin_ppc_set_fpscr_rn:
17489
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
17490
0
                              {EmitScalarExpr(E->getArg(0))});
17491
0
  case PPC::BI__builtin_ppc_mffs:
17492
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
17493
269
  }
17494
269
}
17495
17496
namespace {
17497
// If \p E is not null pointer, insert address space cast to match return
17498
// type of \p E if necessary.
17499
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
17500
21
                             const CallExpr *E = nullptr) {
17501
21
  auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
17502
21
  auto *Call = CGF.Builder.CreateCall(F);
17503
21
  Call->addRetAttr(
17504
21
      Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
17505
21
  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
17506
21
  if (!E)
17507
16
    return Call;
17508
5
  QualType BuiltinRetType = E->getType();
17509
5
  auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
17510
5
  if (RetTy == Call->getType())
17511
5
    return Call;
17512
0
  return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
17513
5
}
17514
17515
10
Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
17516
10
  auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
17517
10
  auto *Call = CGF.Builder.CreateCall(F);
17518
10
  Call->addRetAttr(
17519
10
      Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
17520
10
  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
17521
10
  return Call;
17522
10
}
17523
17524
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17525
/// Emit code based on Code Object ABI version.
17526
/// COV_4    : Emit code to use dispatch ptr
17527
/// COV_5    : Emit code to use implicitarg ptr
17528
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
17529
///            and use its value for COV_4 or COV_5 approach. It is used for
17530
///            compiling device libraries in an ABI-agnostic way.
17531
///
17532
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
17533
///       clang during compilation of user code.
17534
16
Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
17535
16
  llvm::LoadInst *LD;
17536
17537
16
  auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
17538
17539
16
  if (Cov == clang::TargetOptions::COV_None) {
17540
7
    StringRef Name = "__oclc_ABI_version";
17541
7
    auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
17542
7
    if (!ABIVersionC)
17543
3
      ABIVersionC = new llvm::GlobalVariable(
17544
3
          CGF.CGM.getModule(), CGF.Int32Ty, false,
17545
3
          llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
17546
3
          llvm::GlobalVariable::NotThreadLocal,
17547
3
          CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
17548
17549
    // This load will be eliminated by the IPSCCP because it is constant
17550
    // weak_odr without externally_initialized. Either changing it to weak or
17551
    // adding externally_initialized will keep the load.
17552
7
    Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
17553
7
                                                      CGF.CGM.getIntAlign());
17554
17555
7
    Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
17556
7
        ABIVersion,
17557
7
        llvm::ConstantInt::get(CGF.Int32Ty, clang::TargetOptions::COV_5));
17558
17559
    // Indexing the implicit kernarg segment.
17560
7
    Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
17561
7
        CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
17562
17563
    // Indexing the HSA kernel_dispatch_packet struct.
17564
7
    Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
17565
7
        CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
17566
17567
7
    auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
17568
7
    LD = CGF.Builder.CreateLoad(
17569
7
        Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
17570
9
  } else {
17571
9
    Value *GEP = nullptr;
17572
9
    if (Cov == clang::TargetOptions::COV_5) {
17573
      // Indexing the implicit kernarg segment.
17574
3
      GEP = CGF.Builder.CreateConstGEP1_32(
17575
3
          CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
17576
6
    } else {
17577
      // Indexing the HSA kernel_dispatch_packet struct.
17578
6
      GEP = CGF.Builder.CreateConstGEP1_32(
17579
6
          CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
17580
6
    }
17581
9
    LD = CGF.Builder.CreateLoad(
17582
9
        Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
17583
9
  }
17584
17585
16
  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
17586
16
  llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
17587
16
      APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
17588
16
  LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
17589
16
  LD->setMetadata(llvm::LLVMContext::MD_noundef,
17590
16
                  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17591
16
  LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17592
16
                  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17593
16
  return LD;
17594
16
}
17595
17596
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17597
3
Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
17598
3
  const unsigned XOffset = 12;
17599
3
  auto *DP = EmitAMDGPUDispatchPtr(CGF);
17600
  // Indexing the HSA kernel_dispatch_packet struct.
17601
3
  auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
17602
3
  auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
17603
3
  auto *LD = CGF.Builder.CreateLoad(
17604
3
      Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
17605
3
  LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17606
3
                  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17607
3
  return LD;
17608
3
}
17609
} // namespace
17610
17611
// For processing memory ordering and memory scope arguments of various
17612
// amdgcn builtins.
17613
// \p Order takes a C++11 comptabile memory-ordering specifier and converts
17614
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
17615
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
17616
// specific SyncScopeID and writes it to \p SSID.
17617
void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
17618
                                              llvm::AtomicOrdering &AO,
17619
51
                                              llvm::SyncScope::ID &SSID) {
17620
51
  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
17621
17622
  // Map C11/C++11 memory ordering to LLVM memory ordering
17623
51
  assert(llvm::isValidAtomicOrderingCABI(ord));
17624
51
  switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
17625
3
  case llvm::AtomicOrderingCABI::acquire:
17626
5
  case llvm::AtomicOrderingCABI::consume:
17627
5
    AO = llvm::AtomicOrdering::Acquire;
17628
5
    break;
17629
3
  case llvm::AtomicOrderingCABI::release:
17630
3
    AO = llvm::AtomicOrdering::Release;
17631
3
    break;
17632
3
  case llvm::AtomicOrderingCABI::acq_rel:
17633
3
    AO = llvm::AtomicOrdering::AcquireRelease;
17634
3
    break;
17635
38
  case llvm::AtomicOrderingCABI::seq_cst:
17636
38
    AO = llvm::AtomicOrdering::SequentiallyConsistent;
17637
38
    break;
17638
2
  case llvm::AtomicOrderingCABI::relaxed:
17639
2
    AO = llvm::AtomicOrdering::Monotonic;
17640
2
    break;
17641
51
  }
17642
17643
51
  StringRef scp;
17644
51
  llvm::getConstantStringInfo(Scope, scp);
17645
51
  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
17646
51
}
17647
17648
llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
17649
                                                          unsigned Idx,
17650
63.9k
                                                          const CallExpr *E) {
17651
63.9k
  llvm::Value *Arg = nullptr;
17652
63.9k
  if ((ICEArguments & (1 << Idx)) == 0) {
17653
53.2k
    Arg = EmitScalarExpr(E->getArg(Idx));
17654
53.2k
  } else {
17655
    // If this is required to be a constant, constant fold it so that we
17656
    // know that the generated intrinsic gets a ConstantInt.
17657
10.6k
    std::optional<llvm::APSInt> Result =
17658
10.6k
        E->getArg(Idx)->getIntegerConstantExpr(getContext());
17659
10.6k
    assert(Result && "Expected argument to be a constant");
17660
10.6k
    Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
17661
10.6k
  }
17662
63.9k
  return Arg;
17663
63.9k
}
17664
17665
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
17666
439
                                              const CallExpr *E) {
17667
439
  llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
17668
439
  llvm::SyncScope::ID SSID;
17669
439
  switch (BuiltinID) {
17670
1
  case AMDGPU::BI__builtin_amdgcn_div_scale:
17671
4
  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
17672
    // Translate from the intrinsics's struct return to the builtin's out
17673
    // argument.
17674
17675
4
    Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
17676
17677
4
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
17678
4
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
17679
4
    llvm::Value *Z = EmitScalarExpr(E->getArg(2));
17680
17681
4
    llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
17682
4
                                           X->getType());
17683
17684
4
    llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
17685
17686
4
    llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
17687
4
    llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
17688
17689
4
    llvm::Type *RealFlagType = FlagOutPtr.getElementType();
17690
17691
4
    llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
17692
4
    Builder.CreateStore(FlagExt, FlagOutPtr);
17693
4
    return Result;
17694
1
  }
17695
1
  case AMDGPU::BI__builtin_amdgcn_div_fmas:
17696
2
  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
17697
2
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17698
2
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17699
2
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17700
2
    llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
17701
17702
2
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
17703
2
                                      Src0->getType());
17704
2
    llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
17705
2
    return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
17706
1
  }
17707
17708
0
  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
17709
0
    return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
17710
8
  case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
17711
8
    return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
17712
5
  case AMDGPU::BI__builtin_amdgcn_mov_dpp:
17713
13
  case AMDGPU::BI__builtin_amdgcn_update_dpp: {
17714
13
    llvm::SmallVector<llvm::Value *, 6> Args;
17715
    // Find out if any arguments are required to be integer constant
17716
    // expressions.
17717
13
    unsigned ICEArguments = 0;
17718
13
    ASTContext::GetBuiltinTypeError Error;
17719
13
    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
17720
13
    assert(Error == ASTContext::GE_None && "Should not codegen an error");
17721
86
    
for (unsigned I = 0; 13
I != E->getNumArgs();
++I73
) {
17722
73
      Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
17723
73
    }
17724
13
    assert(Args.size() == 5 || Args.size() == 6);
17725
13
    if (Args.size() == 5)
17726
5
      Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
17727
13
    Function *F =
17728
13
        CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
17729
13
    return Builder.CreateCall(F, Args);
17730
13
  }
17731
1
  case AMDGPU::BI__builtin_amdgcn_div_fixup:
17732
2
  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
17733
7
  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
17734
7
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
17735
1
  case AMDGPU::BI__builtin_amdgcn_trig_preop:
17736
2
  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
17737
2
    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
17738
1
  case AMDGPU::BI__builtin_amdgcn_rcp:
17739
5
  case AMDGPU::BI__builtin_amdgcn_rcpf:
17740
10
  case AMDGPU::BI__builtin_amdgcn_rcph:
17741
10
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
17742
1
  case AMDGPU::BI__builtin_amdgcn_sqrt:
17743
2
  case AMDGPU::BI__builtin_amdgcn_sqrtf:
17744
7
  case AMDGPU::BI__builtin_amdgcn_sqrth:
17745
7
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
17746
1
  case AMDGPU::BI__builtin_amdgcn_rsq:
17747
5
  case AMDGPU::BI__builtin_amdgcn_rsqf:
17748
10
  case AMDGPU::BI__builtin_amdgcn_rsqh:
17749
10
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
17750
1
  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
17751
2
  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
17752
2
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
17753
1
  case AMDGPU::BI__builtin_amdgcn_sinf:
17754
6
  case AMDGPU::BI__builtin_amdgcn_sinh:
17755
6
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
17756
1
  case AMDGPU::BI__builtin_amdgcn_cosf:
17757
6
  case AMDGPU::BI__builtin_amdgcn_cosh:
17758
6
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
17759
5
  case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
17760
5
    return EmitAMDGPUDispatchPtr(*this, E);
17761
4
  case AMDGPU::BI__builtin_amdgcn_logf:
17762
4
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
17763
7
  case AMDGPU::BI__builtin_amdgcn_exp2f:
17764
7
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
17765
1
  case AMDGPU::BI__builtin_amdgcn_log_clampf:
17766
1
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
17767
11
  case AMDGPU::BI__builtin_amdgcn_ldexp:
17768
22
  case AMDGPU::BI__builtin_amdgcn_ldexpf: {
17769
22
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17770
22
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17771
22
    llvm::Function *F =
17772
22
        CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
17773
22
    return Builder.CreateCall(F, {Src0, Src1});
17774
11
  }
17775
5
  case AMDGPU::BI__builtin_amdgcn_ldexph: {
17776
    // The raw instruction has a different behavior for out of bounds exponent
17777
    // values (implicit truncation instead of saturate to short_min/short_max).
17778
5
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17779
5
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17780
5
    llvm::Function *F =
17781
5
        CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
17782
5
    return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
17783
11
  }
17784
1
  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
17785
2
  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
17786
7
  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
17787
7
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
17788
1
  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
17789
2
  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
17790
2
    Value *Src0 = EmitScalarExpr(E->getArg(0));
17791
2
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
17792
2
                                { Builder.getInt32Ty(), Src0->getType() });
17793
2
    return Builder.CreateCall(F, Src0);
17794
1
  }
17795
5
  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
17796
5
    Value *Src0 = EmitScalarExpr(E->getArg(0));
17797
5
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
17798
5
                                { Builder.getInt16Ty(), Src0->getType() });
17799
5
    return Builder.CreateCall(F, Src0);
17800
1
  }
17801
1
  case AMDGPU::BI__builtin_amdgcn_fract:
17802
2
  case AMDGPU::BI__builtin_amdgcn_fractf:
17803
7
  case AMDGPU::BI__builtin_amdgcn_fracth:
17804
7
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
17805
0
  case AMDGPU::BI__builtin_amdgcn_lerp:
17806
0
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
17807
1
  case AMDGPU::BI__builtin_amdgcn_ubfe:
17808
1
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
17809
1
  case AMDGPU::BI__builtin_amdgcn_sbfe:
17810
1
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
17811
16
  case AMDGPU::BI__builtin_amdgcn_ballot_w32:
17812
31
  case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
17813
31
    llvm::Type *ResultType = ConvertType(E->getType());
17814
31
    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
17815
31
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
17816
31
    return Builder.CreateCall(F, { Src });
17817
16
  }
17818
1
  case AMDGPU::BI__builtin_amdgcn_uicmp:
17819
4
  case AMDGPU::BI__builtin_amdgcn_uicmpl:
17820
5
  case AMDGPU::BI__builtin_amdgcn_sicmp:
17821
6
  case AMDGPU::BI__builtin_amdgcn_sicmpl: {
17822
6
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17823
6
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17824
6
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17825
17826
    // FIXME-GFX10: How should 32 bit mask be handled?
17827
6
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
17828
6
      { Builder.getInt64Ty(), Src0->getType() });
17829
6
    return Builder.CreateCall(F, { Src0, Src1, Src2 });
17830
5
  }
17831
1
  case AMDGPU::BI__builtin_amdgcn_fcmp:
17832
2
  case AMDGPU::BI__builtin_amdgcn_fcmpf: {
17833
2
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17834
2
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17835
2
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17836
17837
    // FIXME-GFX10: How should 32 bit mask be handled?
17838
2
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
17839
2
      { Builder.getInt64Ty(), Src0->getType() });
17840
2
    return Builder.CreateCall(F, { Src0, Src1, Src2 });
17841
1
  }
17842
1
  case AMDGPU::BI__builtin_amdgcn_class:
17843
2
  case AMDGPU::BI__builtin_amdgcn_classf:
17844
7
  case AMDGPU::BI__builtin_amdgcn_classh:
17845
7
    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
17846
1
  case AMDGPU::BI__builtin_amdgcn_fmed3f:
17847
5
  case AMDGPU::BI__builtin_amdgcn_fmed3h:
17848
5
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
17849
1
  case AMDGPU::BI__builtin_amdgcn_ds_append:
17850
2
  case AMDGPU::BI__builtin_amdgcn_ds_consume: {
17851
2
    Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
17852
1
      Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
17853
2
    Value *Src0 = EmitScalarExpr(E->getArg(0));
17854
2
    Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
17855
2
    return Builder.CreateCall(F, { Src0, Builder.getFalse() });
17856
1
  }
17857
7
  case AMDGPU::BI__builtin_amdgcn_ds_faddf:
17858
15
  case AMDGPU::BI__builtin_amdgcn_ds_fminf:
17859
23
  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
17860
23
    Intrinsic::ID Intrin;
17861
23
    switch (BuiltinID) {
17862
7
    case AMDGPU::BI__builtin_amdgcn_ds_faddf:
17863
7
      Intrin = Intrinsic::amdgcn_ds_fadd;
17864
7
      break;
17865
8
    case AMDGPU::BI__builtin_amdgcn_ds_fminf:
17866
8
      Intrin = Intrinsic::amdgcn_ds_fmin;
17867
8
      break;
17868
8
    case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
17869
8
      Intrin = Intrinsic::amdgcn_ds_fmax;
17870
8
      break;
17871
23
    }
17872
23
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17873
23
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17874
23
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17875
23
    llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
17876
23
    llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
17877
23
    llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
17878
23
    llvm::FunctionType *FTy = F->getFunctionType();
17879
23
    llvm::Type *PTy = FTy->getParamType(0);
17880
23
    Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
17881
23
    return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
17882
23
  }
17883
3
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
17884
14
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
17885
18
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
17886
21
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
17887
24
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
17888
29
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
17889
34
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
17890
39
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
17891
42
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
17892
46
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
17893
46
    Intrinsic::ID IID;
17894
46
    llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
17895
46
    switch (BuiltinID) {
17896
11
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
17897
11
      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
17898
11
      IID = Intrinsic::amdgcn_global_atomic_fadd;
17899
11
      break;
17900
4
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
17901
4
      ArgTy = llvm::FixedVectorType::get(
17902
4
          llvm::Type::getHalfTy(getLLVMContext()), 2);
17903
4
      IID = Intrinsic::amdgcn_global_atomic_fadd;
17904
4
      break;
17905
3
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
17906
3
      IID = Intrinsic::amdgcn_global_atomic_fadd;
17907
3
      break;
17908
3
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
17909
3
      IID = Intrinsic::amdgcn_global_atomic_fmin;
17910
3
      break;
17911
3
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
17912
3
      IID = Intrinsic::amdgcn_global_atomic_fmax;
17913
3
      break;
17914
5
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
17915
5
      IID = Intrinsic::amdgcn_flat_atomic_fadd;
17916
5
      break;
17917
5
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
17918
5
      IID = Intrinsic::amdgcn_flat_atomic_fmin;
17919
5
      break;
17920
5
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
17921
5
      IID = Intrinsic::amdgcn_flat_atomic_fmax;
17922
5
      break;
17923
3
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
17924
3
      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
17925
3
      IID = Intrinsic::amdgcn_flat_atomic_fadd;
17926
3
      break;
17927
4
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
17928
4
      ArgTy = llvm::FixedVectorType::get(
17929
4
          llvm::Type::getHalfTy(getLLVMContext()), 2);
17930
4
      IID = Intrinsic::amdgcn_flat_atomic_fadd;
17931
4
      break;
17932
46
    }
17933
46
    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
17934
46
    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
17935
46
    llvm::Function *F =
17936
46
        CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
17937
46
    return Builder.CreateCall(F, {Addr, Val});
17938
46
  }
17939
4
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
17940
8
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
17941
8
    Intrinsic::ID IID;
17942
8
    switch (BuiltinID) {
17943
4
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
17944
4
      IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
17945
4
      break;
17946
4
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
17947
4
      IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
17948
4
      break;
17949
8
    }
17950
8
    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
17951
8
    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
17952
8
    llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
17953
8
    return Builder.CreateCall(F, {Addr, Val});
17954
8
  }
17955
2
  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
17956
9
  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
17957
16
  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
17958
16
    Intrinsic::ID IID;
17959
16
    llvm::Type *ArgTy;
17960
16
    switch (BuiltinID) {
17961
7
    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
17962
7
      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
17963
7
      IID = Intrinsic::amdgcn_ds_fadd;
17964
7
      break;
17965
2
    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
17966
2
      ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
17967
2
      IID = Intrinsic::amdgcn_ds_fadd;
17968
2
      break;
17969
7
    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
17970
7
      ArgTy = llvm::FixedVectorType::get(
17971
7
          llvm::Type::getHalfTy(getLLVMContext()), 2);
17972
7
      IID = Intrinsic::amdgcn_ds_fadd;
17973
7
      break;
17974
16
    }
17975
16
    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
17976
16
    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
17977
16
    llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
17978
16
        llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
17979
16
    llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
17980
16
        llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
17981
16
    llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
17982
16
    return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
17983
16
  }
17984
10
  case AMDGPU::BI__builtin_amdgcn_read_exec:
17985
10
    return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
17986
10
  case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
17987
10
    return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
17988
10
  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
17989
10
    return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
17990
2
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
17991
4
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
17992
6
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
17993
8
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
17994
8
    llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
17995
8
    llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
17996
8
    llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
17997
8
    llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
17998
8
    llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
17999
8
    llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18000
18001
    // The builtins take these arguments as vec4 where the last element is
18002
    // ignored. The intrinsic takes them as vec3.
18003
8
    RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18004
8
                                            ArrayRef<int>{0, 1, 2});
18005
8
    RayDir =
18006
8
        Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18007
8
    RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18008
8
                                                ArrayRef<int>{0, 1, 2});
18009
18010
8
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18011
8
                                   {NodePtr->getType(), RayDir->getType()});
18012
8
    return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18013
8
                                  RayInverseDir, TextureDescr});
18014
6
  }
18015
18016
6
  case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18017
6
    SmallVector<Value *, 4> Args;
18018
30
    for (int i = 0, e = E->getNumArgs(); i != e; 
++i24
)
18019
24
      Args.push_back(EmitScalarExpr(E->getArg(i)));
18020
18021
6
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18022
6
    Value *Call = Builder.CreateCall(F, Args);
18023
6
    Value *Rtn = Builder.CreateExtractValue(Call, 0);
18024
6
    Value *A = Builder.CreateExtractValue(Call, 1);
18025
6
    llvm::Type *RetTy = ConvertType(E->getType());
18026
6
    Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18027
6
                                            (uint64_t)0);
18028
6
    return Builder.CreateInsertElement(I0, A, 1);
18029
6
  }
18030
18031
1
  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18032
2
  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18033
3
  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18034
4
  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18035
5
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18036
6
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18037
7
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18038
8
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18039
9
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18040
10
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18041
11
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18042
12
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: {
18043
18044
    // These operations perform a matrix multiplication and accumulation of
18045
    // the form:
18046
    //             D = A * B + C
18047
    // The return type always matches the type of matrix C.
18048
12
    unsigned ArgForMatchingRetType;
18049
12
    unsigned BuiltinWMMAOp;
18050
18051
12
    switch (BuiltinID) {
18052
1
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18053
2
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18054
2
      ArgForMatchingRetType = 2;
18055
2
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18056
2
      break;
18057
1
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18058
2
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18059
2
      ArgForMatchingRetType = 2;
18060
2
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18061
2
      break;
18062
1
    case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18063
2
    case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18064
2
      ArgForMatchingRetType = 2;
18065
2
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18066
2
      break;
18067
1
    case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18068
2
    case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18069
2
      ArgForMatchingRetType = 2;
18070
2
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18071
2
      break;
18072
1
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18073
2
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18074
2
      ArgForMatchingRetType = 4;
18075
2
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18076
2
      break;
18077
1
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18078
2
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18079
2
      ArgForMatchingRetType = 4;
18080
2
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18081
2
      break;
18082
12
    }
18083
18084
12
    SmallVector<Value *, 6> Args;
18085
64
    for (int i = 0, e = E->getNumArgs(); i != e; 
++i52
)
18086
52
      Args.push_back(EmitScalarExpr(E->getArg(i)));
18087
18088
12
    Function *F = CGM.getIntrinsic(BuiltinWMMAOp,
18089
12
                                   {Args[ArgForMatchingRetType]->getType()});
18090
18091
12
    return Builder.CreateCall(F, Args);
18092
12
  }
18093
18094
  // amdgcn workitem
18095
1
  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
18096
1
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
18097
1
  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
18098
1
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
18099
1
  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
18100
1
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
18101
18102
  // amdgcn workgroup size
18103
6
  case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
18104
6
    return EmitAMDGPUWorkGroupSize(*this, 0);
18105
5
  case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
18106
5
    return EmitAMDGPUWorkGroupSize(*this, 1);
18107
5
  case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
18108
5
    return EmitAMDGPUWorkGroupSize(*this, 2);
18109
18110
  // amdgcn grid size
18111
1
  case AMDGPU::BI__builtin_amdgcn_grid_size_x:
18112
1
    return EmitAMDGPUGridSize(*this, 0);
18113
1
  case AMDGPU::BI__builtin_amdgcn_grid_size_y:
18114
1
    return EmitAMDGPUGridSize(*this, 1);
18115
1
  case AMDGPU::BI__builtin_amdgcn_grid_size_z:
18116
1
    return EmitAMDGPUGridSize(*this, 2);
18117
18118
  // r600 intrinsics
18119
0
  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
18120
1
  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
18121
1
    return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
18122
1
  case AMDGPU::BI__builtin_r600_read_tidig_x:
18123
1
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
18124
1
  case AMDGPU::BI__builtin_r600_read_tidig_y:
18125
1
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
18126
1
  case AMDGPU::BI__builtin_r600_read_tidig_z:
18127
1
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
18128
1
  case AMDGPU::BI__builtin_amdgcn_alignbit: {
18129
1
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18130
1
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18131
1
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18132
1
    Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
18133
1
    return Builder.CreateCall(F, { Src0, Src1, Src2 });
18134
0
  }
18135
6
  case AMDGPU::BI__builtin_amdgcn_fence: {
18136
6
    ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
18137
6
                            EmitScalarExpr(E->getArg(1)), AO, SSID);
18138
6
    return Builder.CreateFence(AO, SSID);
18139
0
  }
18140
11
  case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
18141
19
  case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
18142
33
  case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
18143
45
  case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
18144
45
    llvm::AtomicRMWInst::BinOp BinOp;
18145
45
    switch (BuiltinID) {
18146
11
    case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
18147
19
    case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
18148
19
      BinOp = llvm::AtomicRMWInst::UIncWrap;
18149
19
      break;
18150
14
    case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
18151
26
    case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
18152
26
      BinOp = llvm::AtomicRMWInst::UDecWrap;
18153
26
      break;
18154
45
    }
18155
18156
45
    Value *Ptr = EmitScalarExpr(E->getArg(0));
18157
45
    Value *Val = EmitScalarExpr(E->getArg(1));
18158
18159
45
    ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
18160
45
                            EmitScalarExpr(E->getArg(3)), AO, SSID);
18161
18162
45
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
18163
45
    bool Volatile =
18164
45
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
18165
18166
45
    llvm::AtomicRMWInst *RMW =
18167
45
        Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
18168
45
    if (Volatile)
18169
5
      RMW->setVolatile(true);
18170
45
    return RMW;
18171
45
  }
18172
7
  case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
18173
15
  case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
18174
15
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
18175
15
    llvm::Type *ResultType = ConvertType(E->getType());
18176
    // s_sendmsg_rtn is mangled using return type only.
18177
15
    Function *F =
18178
15
        CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
18179
15
    return Builder.CreateCall(F, {Arg});
18180
7
  }
18181
0
  default:
18182
0
    return nullptr;
18183
439
  }
18184
439
}
18185
18186
/// Handle a SystemZ function in which the final argument is a pointer
18187
/// to an int that receives the post-instruction CC value.  At the LLVM level
18188
/// this is represented as a function that returns a {result, cc} pair.
18189
static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
18190
                                         unsigned IntrinsicID,
18191
0
                                         const CallExpr *E) {
18192
0
  unsigned NumArgs = E->getNumArgs() - 1;
18193
0
  SmallVector<Value *, 8> Args(NumArgs);
18194
0
  for (unsigned I = 0; I < NumArgs; ++I)
18195
0
    Args[I] = CGF.EmitScalarExpr(E->getArg(I));
18196
0
  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
18197
0
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
18198
0
  Value *Call = CGF.Builder.CreateCall(F, Args);
18199
0
  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
18200
0
  CGF.Builder.CreateStore(CC, CCPtr);
18201
0
  return CGF.Builder.CreateExtractValue(Call, 0);
18202
0
}
18203
18204
Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
18205
0
                                               const CallExpr *E) {
18206
0
  switch (BuiltinID) {
18207
0
  case SystemZ::BI__builtin_tbegin: {
18208
0
    Value *TDB = EmitScalarExpr(E->getArg(0));
18209
0
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
18210
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
18211
0
    return Builder.CreateCall(F, {TDB, Control});
18212
0
  }
18213
0
  case SystemZ::BI__builtin_tbegin_nofloat: {
18214
0
    Value *TDB = EmitScalarExpr(E->getArg(0));
18215
0
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
18216
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
18217
0
    return Builder.CreateCall(F, {TDB, Control});
18218
0
  }
18219
0
  case SystemZ::BI__builtin_tbeginc: {
18220
0
    Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
18221
0
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
18222
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
18223
0
    return Builder.CreateCall(F, {TDB, Control});
18224
0
  }
18225
0
  case SystemZ::BI__builtin_tabort: {
18226
0
    Value *Data = EmitScalarExpr(E->getArg(0));
18227
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
18228
0
    return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
18229
0
  }
18230
0
  case SystemZ::BI__builtin_non_tx_store: {
18231
0
    Value *Address = EmitScalarExpr(E->getArg(0));
18232
0
    Value *Data = EmitScalarExpr(E->getArg(1));
18233
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
18234
0
    return Builder.CreateCall(F, {Data, Address});
18235
0
  }
18236
18237
  // Vector builtins.  Note that most vector builtins are mapped automatically
18238
  // to target-specific LLVM intrinsics.  The ones handled specially here can
18239
  // be represented via standard LLVM IR, which is preferable to enable common
18240
  // LLVM optimizations.
18241
18242
0
  case SystemZ::BI__builtin_s390_vpopctb:
18243
0
  case SystemZ::BI__builtin_s390_vpopcth:
18244
0
  case SystemZ::BI__builtin_s390_vpopctf:
18245
0
  case SystemZ::BI__builtin_s390_vpopctg: {
18246
0
    llvm::Type *ResultType = ConvertType(E->getType());
18247
0
    Value *X = EmitScalarExpr(E->getArg(0));
18248
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
18249
0
    return Builder.CreateCall(F, X);
18250
0
  }
18251
18252
0
  case SystemZ::BI__builtin_s390_vclzb:
18253
0
  case SystemZ::BI__builtin_s390_vclzh:
18254
0
  case SystemZ::BI__builtin_s390_vclzf:
18255
0
  case SystemZ::BI__builtin_s390_vclzg: {
18256
0
    llvm::Type *ResultType = ConvertType(E->getType());
18257
0
    Value *X = EmitScalarExpr(E->getArg(0));
18258
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18259
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18260
0
    return Builder.CreateCall(F, {X, Undef});
18261
0
  }
18262
18263
0
  case SystemZ::BI__builtin_s390_vctzb:
18264
0
  case SystemZ::BI__builtin_s390_vctzh:
18265
0
  case SystemZ::BI__builtin_s390_vctzf:
18266
0
  case SystemZ::BI__builtin_s390_vctzg: {
18267
0
    llvm::Type *ResultType = ConvertType(E->getType());
18268
0
    Value *X = EmitScalarExpr(E->getArg(0));
18269
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18270
0
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18271
0
    return Builder.CreateCall(F, {X, Undef});
18272
0
  }
18273
18274
0
  case SystemZ::BI__builtin_s390_vfsqsb:
18275
0
  case SystemZ::BI__builtin_s390_vfsqdb: {
18276
0
    llvm::Type *ResultType = ConvertType(E->getType());
18277
0
    Value *X = EmitScalarExpr(E->getArg(0));
18278
0
    if (Builder.getIsFPConstrained()) {
18279
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
18280
0
      return Builder.CreateConstrainedFPCall(F, { X });
18281
0
    } else {
18282
0
      Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18283
0
      return Builder.CreateCall(F, X);
18284
0
    }
18285
0
  }
18286
0
  case SystemZ::BI__builtin_s390_vfmasb:
18287
0
  case SystemZ::BI__builtin_s390_vfmadb: {
18288
0
    llvm::Type *ResultType = ConvertType(E->getType());
18289
0
    Value *X = EmitScalarExpr(E->getArg(0));
18290
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18291
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18292
0
    if (Builder.getIsFPConstrained()) {
18293
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18294
0
      return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18295
0
    } else {
18296
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18297
0
      return Builder.CreateCall(F, {X, Y, Z});
18298
0
    }
18299
0
  }
18300
0
  case SystemZ::BI__builtin_s390_vfmssb:
18301
0
  case SystemZ::BI__builtin_s390_vfmsdb: {
18302
0
    llvm::Type *ResultType = ConvertType(E->getType());
18303
0
    Value *X = EmitScalarExpr(E->getArg(0));
18304
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18305
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18306
0
    if (Builder.getIsFPConstrained()) {
18307
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18308
0
      return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18309
0
    } else {
18310
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18311
0
      return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18312
0
    }
18313
0
  }
18314
0
  case SystemZ::BI__builtin_s390_vfnmasb:
18315
0
  case SystemZ::BI__builtin_s390_vfnmadb: {
18316
0
    llvm::Type *ResultType = ConvertType(E->getType());
18317
0
    Value *X = EmitScalarExpr(E->getArg(0));
18318
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18319
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18320
0
    if (Builder.getIsFPConstrained()) {
18321
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18322
0
      return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y,  Z}), "neg");
18323
0
    } else {
18324
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18325
0
      return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18326
0
    }
18327
0
  }
18328
0
  case SystemZ::BI__builtin_s390_vfnmssb:
18329
0
  case SystemZ::BI__builtin_s390_vfnmsdb: {
18330
0
    llvm::Type *ResultType = ConvertType(E->getType());
18331
0
    Value *X = EmitScalarExpr(E->getArg(0));
18332
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18333
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18334
0
    if (Builder.getIsFPConstrained()) {
18335
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18336
0
      Value *NegZ = Builder.CreateFNeg(Z, "sub");
18337
0
      return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
18338
0
    } else {
18339
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18340
0
      Value *NegZ = Builder.CreateFNeg(Z, "neg");
18341
0
      return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
18342
0
    }
18343
0
  }
18344
0
  case SystemZ::BI__builtin_s390_vflpsb:
18345
0
  case SystemZ::BI__builtin_s390_vflpdb: {
18346
0
    llvm::Type *ResultType = ConvertType(E->getType());
18347
0
    Value *X = EmitScalarExpr(E->getArg(0));
18348
0
    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18349
0
    return Builder.CreateCall(F, X);
18350
0
  }
18351
0
  case SystemZ::BI__builtin_s390_vflnsb:
18352
0
  case SystemZ::BI__builtin_s390_vflndb: {
18353
0
    llvm::Type *ResultType = ConvertType(E->getType());
18354
0
    Value *X = EmitScalarExpr(E->getArg(0));
18355
0
    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18356
0
    return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
18357
0
  }
18358
0
  case SystemZ::BI__builtin_s390_vfisb:
18359
0
  case SystemZ::BI__builtin_s390_vfidb: {
18360
0
    llvm::Type *ResultType = ConvertType(E->getType());
18361
0
    Value *X = EmitScalarExpr(E->getArg(0));
18362
    // Constant-fold the M4 and M5 mask arguments.
18363
0
    llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
18364
0
    llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
18365
    // Check whether this instance can be represented via a LLVM standard
18366
    // intrinsic.  We only support some combinations of M4 and M5.
18367
0
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
18368
0
    Intrinsic::ID CI;
18369
0
    switch (M4.getZExtValue()) {
18370
0
    default: break;
18371
0
    case 0:  // IEEE-inexact exception allowed
18372
0
      switch (M5.getZExtValue()) {
18373
0
      default: break;
18374
0
      case 0: ID = Intrinsic::rint;
18375
0
              CI = Intrinsic::experimental_constrained_rint; break;
18376
0
      }
18377
0
      break;
18378
0
    case 4:  // IEEE-inexact exception suppressed
18379
0
      switch (M5.getZExtValue()) {
18380
0
      default: break;
18381
0
      case 0: ID = Intrinsic::nearbyint;
18382
0
              CI = Intrinsic::experimental_constrained_nearbyint; break;
18383
0
      case 1: ID = Intrinsic::round;
18384
0
              CI = Intrinsic::experimental_constrained_round; break;
18385
0
      case 5: ID = Intrinsic::trunc;
18386
0
              CI = Intrinsic::experimental_constrained_trunc; break;
18387
0
      case 6: ID = Intrinsic::ceil;
18388
0
              CI = Intrinsic::experimental_constrained_ceil; break;
18389
0
      case 7: ID = Intrinsic::floor;
18390
0
              CI = Intrinsic::experimental_constrained_floor; break;
18391
0
      }
18392
0
      break;
18393
0
    }
18394
0
    if (ID != Intrinsic::not_intrinsic) {
18395
0
      if (Builder.getIsFPConstrained()) {
18396
0
        Function *F = CGM.getIntrinsic(CI, ResultType);
18397
0
        return Builder.CreateConstrainedFPCall(F, X);
18398
0
      } else {
18399
0
        Function *F = CGM.getIntrinsic(ID, ResultType);
18400
0
        return Builder.CreateCall(F, X);
18401
0
      }
18402
0
    }
18403
0
    switch (BuiltinID) { // FIXME: constrained version?
18404
0
      case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
18405
0
      case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
18406
0
      default: llvm_unreachable("Unknown BuiltinID");
18407
0
    }
18408
0
    Function *F = CGM.getIntrinsic(ID);
18409
0
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
18410
0
    Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
18411
0
    return Builder.CreateCall(F, {X, M4Value, M5Value});
18412
0
  }
18413
0
  case SystemZ::BI__builtin_s390_vfmaxsb:
18414
0
  case SystemZ::BI__builtin_s390_vfmaxdb: {
18415
0
    llvm::Type *ResultType = ConvertType(E->getType());
18416
0
    Value *X = EmitScalarExpr(E->getArg(0));
18417
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18418
    // Constant-fold the M4 mask argument.
18419
0
    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
18420
    // Check whether this instance can be represented via a LLVM standard
18421
    // intrinsic.  We only support some values of M4.
18422
0
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
18423
0
    Intrinsic::ID CI;
18424
0
    switch (M4.getZExtValue()) {
18425
0
    default: break;
18426
0
    case 4: ID = Intrinsic::maxnum;
18427
0
            CI = Intrinsic::experimental_constrained_maxnum; break;
18428
0
    }
18429
0
    if (ID != Intrinsic::not_intrinsic) {
18430
0
      if (Builder.getIsFPConstrained()) {
18431
0
        Function *F = CGM.getIntrinsic(CI, ResultType);
18432
0
        return Builder.CreateConstrainedFPCall(F, {X, Y});
18433
0
      } else {
18434
0
        Function *F = CGM.getIntrinsic(ID, ResultType);
18435
0
        return Builder.CreateCall(F, {X, Y});
18436
0
      }
18437
0
    }
18438
0
    switch (BuiltinID) {
18439
0
      case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
18440
0
      case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
18441
0
      default: llvm_unreachable("Unknown BuiltinID");
18442
0
    }
18443
0
    Function *F = CGM.getIntrinsic(ID);
18444
0
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
18445
0
    return Builder.CreateCall(F, {X, Y, M4Value});
18446
0
  }
18447
0
  case SystemZ::BI__builtin_s390_vfminsb:
18448
0
  case SystemZ::BI__builtin_s390_vfmindb: {
18449
0
    llvm::Type *ResultType = ConvertType(E->getType());
18450
0
    Value *X = EmitScalarExpr(E->getArg(0));
18451
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18452
    // Constant-fold the M4 mask argument.
18453
0
    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
18454
    // Check whether this instance can be represented via a LLVM standard
18455
    // intrinsic.  We only support some values of M4.
18456
0
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
18457
0
    Intrinsic::ID CI;
18458
0
    switch (M4.getZExtValue()) {
18459
0
    default: break;
18460
0
    case 4: ID = Intrinsic::minnum;
18461
0
            CI = Intrinsic::experimental_constrained_minnum; break;
18462
0
    }
18463
0
    if (ID != Intrinsic::not_intrinsic) {
18464
0
      if (Builder.getIsFPConstrained()) {
18465
0
        Function *F = CGM.getIntrinsic(CI, ResultType);
18466
0
        return Builder.CreateConstrainedFPCall(F, {X, Y});
18467
0
      } else {
18468
0
        Function *F = CGM.getIntrinsic(ID, ResultType);
18469
0
        return Builder.CreateCall(F, {X, Y});
18470
0
      }
18471
0
    }
18472
0
    switch (BuiltinID) {
18473
0
      case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
18474
0
      case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
18475
0
      default: llvm_unreachable("Unknown BuiltinID");
18476
0
    }
18477
0
    Function *F = CGM.getIntrinsic(ID);
18478
0
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
18479
0
    return Builder.CreateCall(F, {X, Y, M4Value});
18480
0
  }
18481
18482
0
  case SystemZ::BI__builtin_s390_vlbrh:
18483
0
  case SystemZ::BI__builtin_s390_vlbrf:
18484
0
  case SystemZ::BI__builtin_s390_vlbrg: {
18485
0
    llvm::Type *ResultType = ConvertType(E->getType());
18486
0
    Value *X = EmitScalarExpr(E->getArg(0));
18487
0
    Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
18488
0
    return Builder.CreateCall(F, X);
18489
0
  }
18490
18491
  // Vector intrinsics that output the post-instruction CC value.
18492
18493
0
#define INTRINSIC_WITH_CC(NAME) \
18494
0
    case SystemZ::BI__builtin_##NAME: \
18495
0
      return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
18496
18497
0
  INTRINSIC_WITH_CC(s390_vpkshs);
18498
0
  INTRINSIC_WITH_CC(s390_vpksfs);
18499
0
  INTRINSIC_WITH_CC(s390_vpksgs);
18500
18501
0
  INTRINSIC_WITH_CC(s390_vpklshs);
18502
0
  INTRINSIC_WITH_CC(s390_vpklsfs);
18503
0
  INTRINSIC_WITH_CC(s390_vpklsgs);
18504
18505
0
  INTRINSIC_WITH_CC(s390_vceqbs);
18506
0
  INTRINSIC_WITH_CC(s390_vceqhs);
18507
0
  INTRINSIC_WITH_CC(s390_vceqfs);
18508
0
  INTRINSIC_WITH_CC(s390_vceqgs);
18509
18510
0
  INTRINSIC_WITH_CC(s390_vchbs);
18511
0
  INTRINSIC_WITH_CC(s390_vchhs);
18512
0
  INTRINSIC_WITH_CC(s390_vchfs);
18513
0
  INTRINSIC_WITH_CC(s390_vchgs);
18514
18515
0
  INTRINSIC_WITH_CC(s390_vchlbs);
18516
0
  INTRINSIC_WITH_CC(s390_vchlhs);
18517
0
  INTRINSIC_WITH_CC(s390_vchlfs);
18518
0
  INTRINSIC_WITH_CC(s390_vchlgs);
18519
18520
0
  INTRINSIC_WITH_CC(s390_vfaebs);
18521
0
  INTRINSIC_WITH_CC(s390_vfaehs);
18522
0
  INTRINSIC_WITH_CC(s390_vfaefs);
18523
18524
0
  INTRINSIC_WITH_CC(s390_vfaezbs);
18525
0
  INTRINSIC_WITH_CC(s390_vfaezhs);
18526
0
  INTRINSIC_WITH_CC(s390_vfaezfs);
18527
18528
0
  INTRINSIC_WITH_CC(s390_vfeebs);
18529
0
  INTRINSIC_WITH_CC(s390_vfeehs);
18530
0
  INTRINSIC_WITH_CC(s390_vfeefs);
18531
18532
0
  INTRINSIC_WITH_CC(s390_vfeezbs);
18533
0
  INTRINSIC_WITH_CC(s390_vfeezhs);
18534
0
  INTRINSIC_WITH_CC(s390_vfeezfs);
18535
18536
0
  INTRINSIC_WITH_CC(s390_vfenebs);
18537
0
  INTRINSIC_WITH_CC(s390_vfenehs);
18538
0
  INTRINSIC_WITH_CC(s390_vfenefs);
18539
18540
0
  INTRINSIC_WITH_CC(s390_vfenezbs);
18541
0
  INTRINSIC_WITH_CC(s390_vfenezhs);
18542
0
  INTRINSIC_WITH_CC(s390_vfenezfs);
18543
18544
0
  INTRINSIC_WITH_CC(s390_vistrbs);
18545
0
  INTRINSIC_WITH_CC(s390_vistrhs);
18546
0
  INTRINSIC_WITH_CC(s390_vistrfs);
18547
18548
0
  INTRINSIC_WITH_CC(s390_vstrcbs);
18549
0
  INTRINSIC_WITH_CC(s390_vstrchs);
18550
0
  INTRINSIC_WITH_CC(s390_vstrcfs);
18551
18552
0
  INTRINSIC_WITH_CC(s390_vstrczbs);
18553
0
  INTRINSIC_WITH_CC(s390_vstrczhs);
18554
0
  INTRINSIC_WITH_CC(s390_vstrczfs);
18555
18556
0
  INTRINSIC_WITH_CC(s390_vfcesbs);
18557
0
  INTRINSIC_WITH_CC(s390_vfcedbs);
18558
0
  INTRINSIC_WITH_CC(s390_vfchsbs);
18559
0
  INTRINSIC_WITH_CC(s390_vfchdbs);
18560
0
  INTRINSIC_WITH_CC(s390_vfchesbs);
18561
0
  INTRINSIC_WITH_CC(s390_vfchedbs);
18562
18563
0
  INTRINSIC_WITH_CC(s390_vftcisb);
18564
0
  INTRINSIC_WITH_CC(s390_vftcidb);
18565
18566
0
  INTRINSIC_WITH_CC(s390_vstrsb);
18567
0
  INTRINSIC_WITH_CC(s390_vstrsh);
18568
0
  INTRINSIC_WITH_CC(s390_vstrsf);
18569
18570
0
  INTRINSIC_WITH_CC(s390_vstrszb);
18571
0
  INTRINSIC_WITH_CC(s390_vstrszh);
18572
0
  INTRINSIC_WITH_CC(s390_vstrszf);
18573
18574
0
#undef INTRINSIC_WITH_CC
18575
18576
0
  default:
18577
0
    return nullptr;
18578
0
  }
18579
0
}
18580
18581
namespace {
18582
// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
18583
struct NVPTXMmaLdstInfo {
18584
  unsigned NumResults;  // Number of elements to load/store
18585
  // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
18586
  unsigned IID_col;
18587
  unsigned IID_row;
18588
};
18589
18590
#define MMA_INTR(geom_op_type, layout) \
18591
708
  Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
18592
#define MMA_LDST(n, geom_op_type)                                              \
18593
348
  { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
18594
18595
360
static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
18596
360
  switch (BuiltinID) {
18597
  // FP MMA loads
18598
14
  case NVPTX::BI__hmma_m16n16k16_ld_a:
18599
14
    return MMA_LDST(8, m16n16k16_load_a_f16);
18600
14
  case NVPTX::BI__hmma_m16n16k16_ld_b:
18601
14
    return MMA_LDST(8, m16n16k16_load_b_f16);
18602
14
  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
18603
14
    return MMA_LDST(4, m16n16k16_load_c_f16);
18604
14
  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
18605
14
    return MMA_LDST(8, m16n16k16_load_c_f32);
18606
10
  case NVPTX::BI__hmma_m32n8k16_ld_a:
18607
10
    return MMA_LDST(8, m32n8k16_load_a_f16);
18608
10
  case NVPTX::BI__hmma_m32n8k16_ld_b:
18609
10
    return MMA_LDST(8, m32n8k16_load_b_f16);
18610
10
  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
18611
10
    return MMA_LDST(4, m32n8k16_load_c_f16);
18612
10
  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
18613
10
    return MMA_LDST(8, m32n8k16_load_c_f32);
18614
10
  case NVPTX::BI__hmma_m8n32k16_ld_a:
18615
10
    return MMA_LDST(8, m8n32k16_load_a_f16);
18616
10
  case NVPTX::BI__hmma_m8n32k16_ld_b:
18617
10
    return MMA_LDST(8, m8n32k16_load_b_f16);
18618
10
  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
18619
10
    return MMA_LDST(4, m8n32k16_load_c_f16);
18620
10
  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
18621
10
    return MMA_LDST(8, m8n32k16_load_c_f32);
18622
18623
  // Integer MMA loads
18624
4
  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
18625
4
    return MMA_LDST(2, m16n16k16_load_a_s8);
18626
4
  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
18627
4
    return MMA_LDST(2, m16n16k16_load_a_u8);
18628
4
  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
18629
4
    return MMA_LDST(2, m16n16k16_load_b_s8);
18630
4
  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
18631
4
    return MMA_LDST(2, m16n16k16_load_b_u8);
18632
4
  case NVPTX::BI__imma_m16n16k16_ld_c:
18633
4
    return MMA_LDST(8, m16n16k16_load_c_s32);
18634
4
  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
18635
4
    return MMA_LDST(4, m32n8k16_load_a_s8);
18636
4
  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
18637
4
    return MMA_LDST(4, m32n8k16_load_a_u8);
18638
4
  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
18639
4
    return MMA_LDST(1, m32n8k16_load_b_s8);
18640
4
  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
18641
4
    return MMA_LDST(1, m32n8k16_load_b_u8);
18642
4
  case NVPTX::BI__imma_m32n8k16_ld_c:
18643
4
    return MMA_LDST(8, m32n8k16_load_c_s32);
18644
4
  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
18645
4
    return MMA_LDST(1, m8n32k16_load_a_s8);
18646
4
  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
18647
4
    return MMA_LDST(1, m8n32k16_load_a_u8);
18648
4
  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
18649
4
    return MMA_LDST(4, m8n32k16_load_b_s8);
18650
4
  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
18651
4
    return MMA_LDST(4, m8n32k16_load_b_u8);
18652
4
  case NVPTX::BI__imma_m8n32k16_ld_c:
18653
4
    return MMA_LDST(8, m8n32k16_load_c_s32);
18654
18655
  // Sub-integer MMA loads.
18656
  // Only row/col layout is supported by A/B fragments.
18657
2
  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
18658
2
    return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
18659
2
  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
18660
2
    return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
18661
2
  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
18662
2
    return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
18663
2
  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
18664
2
    return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
18665
4
  case NVPTX::BI__imma_m8n8k32_ld_c:
18666
4
    return MMA_LDST(2, m8n8k32_load_c_s32);
18667
2
  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
18668
2
    return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
18669
2
  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
18670
2
    return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
18671
4
  case NVPTX::BI__bmma_m8n8k128_ld_c:
18672
4
    return MMA_LDST(2, m8n8k128_load_c_s32);
18673
18674
  // Double MMA loads
18675
4
  case NVPTX::BI__dmma_m8n8k4_ld_a:
18676
4
    return MMA_LDST(1, m8n8k4_load_a_f64);
18677
4
  case NVPTX::BI__dmma_m8n8k4_ld_b:
18678
4
    return MMA_LDST(1, m8n8k4_load_b_f64);
18679
4
  case NVPTX::BI__dmma_m8n8k4_ld_c:
18680
4
    return MMA_LDST(2, m8n8k4_load_c_f64);
18681
18682
  // Alternate float MMA loads
18683
4
  case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
18684
4
    return MMA_LDST(4, m16n16k16_load_a_bf16);
18685
4
  case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
18686
4
    return MMA_LDST(4, m16n16k16_load_b_bf16);
18687
4
  case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
18688
4
    return MMA_LDST(2, m8n32k16_load_a_bf16);
18689
4
  case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
18690
4
    return MMA_LDST(8, m8n32k16_load_b_bf16);
18691
4
  case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
18692
4
    return MMA_LDST(8, m32n8k16_load_a_bf16);
18693
4
  case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
18694
4
    return MMA_LDST(2, m32n8k16_load_b_bf16);
18695
4
  case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
18696
4
    return MMA_LDST(4, m16n16k8_load_a_tf32);
18697
4
  case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
18698
4
    return MMA_LDST(4, m16n16k8_load_b_tf32);
18699
4
  case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
18700
4
    return MMA_LDST(8, m16n16k8_load_c_f32);
18701
18702
  // NOTE: We need to follow inconsitent naming scheme used by NVCC.  Unlike
18703
  // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
18704
  // use fragment C for both loads and stores.
18705
  // FP MMA stores.
18706
14
  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
18707
14
    return MMA_LDST(4, m16n16k16_store_d_f16);
18708
14
  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
18709
14
    return MMA_LDST(8, m16n16k16_store_d_f32);
18710
10
  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
18711
10
    return MMA_LDST(4, m32n8k16_store_d_f16);
18712
10
  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
18713
10
    return MMA_LDST(8, m32n8k16_store_d_f32);
18714
10
  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
18715
10
    return MMA_LDST(4, m8n32k16_store_d_f16);
18716
10
  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
18717
10
    return MMA_LDST(8, m8n32k16_store_d_f32);
18718
18719
  // Integer and sub-integer MMA stores.
18720
  // Another naming quirk. Unlike other MMA builtins that use PTX types in the
18721
  // name, integer loads/stores use LLVM's i32.
18722
4
  case NVPTX::BI__imma_m16n16k16_st_c_i32:
18723
4
    return MMA_LDST(8, m16n16k16_store_d_s32);
18724
4
  case NVPTX::BI__imma_m32n8k16_st_c_i32:
18725
4
    return MMA_LDST(8, m32n8k16_store_d_s32);
18726
4
  case NVPTX::BI__imma_m8n32k16_st_c_i32:
18727
4
    return MMA_LDST(8, m8n32k16_store_d_s32);
18728
4
  case NVPTX::BI__imma_m8n8k32_st_c_i32:
18729
4
    return MMA_LDST(2, m8n8k32_store_d_s32);
18730
4
  case NVPTX::BI__bmma_m8n8k128_st_c_i32:
18731
4
    return MMA_LDST(2, m8n8k128_store_d_s32);
18732
18733
  // Double MMA store
18734
4
  case NVPTX::BI__dmma_m8n8k4_st_c_f64:
18735
4
    return MMA_LDST(2, m8n8k4_store_d_f64);
18736
18737
  // Alternate float MMA store
18738
4
  case NVPTX::BI__mma_m16n16k8_st_c_f32:
18739
4
    return MMA_LDST(8, m16n16k8_store_d_f32);
18740
18741
0
  default:
18742
0
    llvm_unreachable("Unknown MMA builtin");
18743
360
  }
18744
360
}
18745
#undef MMA_LDST
18746
#undef MMA_INTR
18747
18748
18749
struct NVPTXMmaInfo {
18750
  unsigned NumEltsA;
18751
  unsigned NumEltsB;
18752
  unsigned NumEltsC;
18753
  unsigned NumEltsD;
18754
18755
  // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
18756
  // over 'col' for layout. The index of non-satf variants is expected to match
18757
  // the undocumented layout constants used by CUDA's mma.hpp.
18758
  std::array<unsigned, 8> Variants;
18759
18760
692
  unsigned getMMAIntrinsic(int Layout, bool Satf) {
18761
692
    unsigned Index = Layout + 4 * Satf;
18762
692
    if (Index >= Variants.size())
18763
0
      return 0;
18764
692
    return Variants[Index];
18765
692
  }
18766
};
18767
18768
  // Returns an intrinsic that matches Layout and Satf for valid combinations of
18769
  // Layout and Satf, 0 otherwise.
18770
692
static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
18771
  // clang-format off
18772
692
#define MMA_VARIANTS(geom, type)                                    \
18773
692
      Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type,             \
18774
680
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type,             \
18775
680
      Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type,             \
18776
680
      Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
18777
692
#define MMA_SATF_VARIANTS(geom, type)                               \
18778
692
      
MMA_VARIANTS640
(geom, type), \
18779
640
      Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
18780
640
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
18781
640
      Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
18782
640
      Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
18783
// Sub-integer MMA only supports row.col layout.
18784
692
#define MMA_VARIANTS_I4(geom, type) \
18785
692
      0, \
18786
8
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type,             \
18787
8
      0, \
18788
8
      0, \
18789
8
      0, \
18790
8
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
18791
8
      0, \
18792
8
      0
18793
// b1 MMA does not support .satfinite.
18794
692
#define MMA_VARIANTS_B1_XOR(geom, type) \
18795
692
      0, \
18796
2
      Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type,             \
18797
2
      0, \
18798
2
      0, \
18799
2
      0, \
18800
2
      0, \
18801
2
      0, \
18802
2
      0
18803
692
#define MMA_VARIANTS_B1_AND(geom, type) \
18804
692
      0, \
18805
2
      Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type,             \
18806
2
      0, \
18807
2
      0, \
18808
2
      0, \
18809
2
      0, \
18810
2
      0, \
18811
2
      0
18812
  // clang-format on
18813
692
  switch (BuiltinID) {
18814
  // FP MMA
18815
  // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
18816
  // NumEltsN of return value are ordered as A,B,C,D.
18817
56
  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
18818
56
    return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
18819
56
  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
18820
56
    return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
18821
56
  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
18822
56
    return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
18823
56
  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
18824
56
    return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
18825
40
  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
18826
40
    return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
18827
40
  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
18828
40
    return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
18829
40
  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
18830
40
    return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
18831
40
  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
18832
40
    return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
18833
40
  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
18834
40
    return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
18835
40
  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
18836
40
    return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
18837
40
  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
18838
40
    return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
18839
40
  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
18840
40
    return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
18841
18842
  // Integer MMA
18843
16
  case NVPTX::BI__imma_m16n16k16_mma_s8:
18844
16
    return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
18845
16
  case NVPTX::BI__imma_m16n16k16_mma_u8:
18846
16
    return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
18847
16
  case NVPTX::BI__imma_m32n8k16_mma_s8:
18848
16
    return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
18849
16
  case NVPTX::BI__imma_m32n8k16_mma_u8:
18850
16
    return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
18851
16
  case NVPTX::BI__imma_m8n32k16_mma_s8:
18852
16
    return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
18853
16
  case NVPTX::BI__imma_m8n32k16_mma_u8:
18854
16
    return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
18855
18856
  // Sub-integer MMA
18857
4
  case NVPTX::BI__imma_m8n8k32_mma_s4:
18858
4
    return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
18859
4
  case NVPTX::BI__imma_m8n8k32_mma_u4:
18860
4
    return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
18861
2
  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
18862
2
    return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
18863
2
  case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
18864
2
    return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
18865
18866
  // Double MMA
18867
8
  case NVPTX::BI__dmma_m8n8k4_mma_f64:
18868
8
    return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
18869
18870
  // Alternate FP MMA
18871
8
  case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
18872
8
    return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
18873
8
  case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
18874
8
    return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
18875
8
  case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
18876
8
    return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
18877
8
  case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
18878
8
    return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
18879
0
  default:
18880
0
    llvm_unreachable("Unexpected builtin ID.");
18881
692
  }
18882
692
#undef MMA_VARIANTS
18883
692
#undef MMA_SATF_VARIANTS
18884
692
#undef MMA_VARIANTS_I4
18885
692
#undef MMA_VARIANTS_B1_AND
18886
692
#undef MMA_VARIANTS_B1_XOR
18887
692
}
18888
18889
static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
18890
0
                         const CallExpr *E) {
18891
0
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
18892
0
  QualType ArgType = E->getArg(0)->getType();
18893
0
  clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
18894
0
  llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
18895
0
  return CGF.Builder.CreateCall(
18896
0
      CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
18897
0
      {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
18898
0
}
18899
18900
static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
18901
0
                               const CallExpr *E) {
18902
0
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
18903
0
  llvm::Type *ElemTy =
18904
0
      CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
18905
0
  return CGF.Builder.CreateCall(
18906
0
      CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
18907
0
      {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
18908
0
}
18909
18910
static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
18911
                          CodeGenFunction &CGF, const CallExpr *E,
18912
0
                          int SrcSize) {
18913
0
  return E->getNumArgs() == 3
18914
0
             ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
18915
0
                                      {CGF.EmitScalarExpr(E->getArg(0)),
18916
0
                                       CGF.EmitScalarExpr(E->getArg(1)),
18917
0
                                       CGF.EmitScalarExpr(E->getArg(2))})
18918
0
             : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
18919
0
                                      {CGF.EmitScalarExpr(E->getArg(0)),
18920
0
                                       CGF.EmitScalarExpr(E->getArg(1))});
18921
0
}
18922
18923
static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
18924
0
                           const CallExpr *E, CodeGenFunction &CGF) {
18925
0
  auto &C = CGF.CGM.getContext();
18926
0
  if (!(C.getLangOpts().NativeHalfType ||
18927
0
        !C.getTargetInfo().useFP16ConversionIntrinsics())) {
18928
0
    CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
18929
0
                                       " requires native half type support.");
18930
0
    return nullptr;
18931
0
  }
18932
18933
0
  if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
18934
0
      IntrinsicID == Intrinsic::nvvm_ldu_global_f)
18935
0
    return MakeLdgLdu(IntrinsicID, CGF, E);
18936
18937
0
  SmallVector<Value *, 16> Args;
18938
0
  auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
18939
0
  auto *FTy = F->getFunctionType();
18940
0
  unsigned ICEArguments = 0;
18941
0
  ASTContext::GetBuiltinTypeError Error;
18942
0
  C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
18943
0
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
18944
0
  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
18945
0
    assert((ICEArguments & (1 << i)) == 0);
18946
0
    auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
18947
0
    auto *PTy = FTy->getParamType(i);
18948
0
    if (PTy != ArgValue->getType())
18949
0
      ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
18950
0
    Args.push_back(ArgValue);
18951
0
  }
18952
18953
0
  return CGF.Builder.CreateCall(F, Args);
18954
0
}
18955
} // namespace
18956
18957
Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
18958
1.09k
                                             const CallExpr *E) {
18959
1.09k
  switch (BuiltinID) {
18960
0
  case NVPTX::BI__nvvm_atom_add_gen_i:
18961
0
  case NVPTX::BI__nvvm_atom_add_gen_l:
18962
0
  case NVPTX::BI__nvvm_atom_add_gen_ll:
18963
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
18964
18965
0
  case NVPTX::BI__nvvm_atom_sub_gen_i:
18966
0
  case NVPTX::BI__nvvm_atom_sub_gen_l:
18967
0
  case NVPTX::BI__nvvm_atom_sub_gen_ll:
18968
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
18969
18970
0
  case NVPTX::BI__nvvm_atom_and_gen_i:
18971
0
  case NVPTX::BI__nvvm_atom_and_gen_l:
18972
0
  case NVPTX::BI__nvvm_atom_and_gen_ll:
18973
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
18974
18975
0
  case NVPTX::BI__nvvm_atom_or_gen_i:
18976
0
  case NVPTX::BI__nvvm_atom_or_gen_l:
18977
0
  case NVPTX::BI__nvvm_atom_or_gen_ll:
18978
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
18979
18980
0
  case NVPTX::BI__nvvm_atom_xor_gen_i:
18981
0
  case NVPTX::BI__nvvm_atom_xor_gen_l:
18982
0
  case NVPTX::BI__nvvm_atom_xor_gen_ll:
18983
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
18984
18985
0
  case NVPTX::BI__nvvm_atom_xchg_gen_i:
18986
0
  case NVPTX::BI__nvvm_atom_xchg_gen_l:
18987
0
  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
18988
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
18989
18990
0
  case NVPTX::BI__nvvm_atom_max_gen_i:
18991
0
  case NVPTX::BI__nvvm_atom_max_gen_l:
18992
0
  case NVPTX::BI__nvvm_atom_max_gen_ll:
18993
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
18994
18995
0
  case NVPTX::BI__nvvm_atom_max_gen_ui:
18996
0
  case NVPTX::BI__nvvm_atom_max_gen_ul:
18997
0
  case NVPTX::BI__nvvm_atom_max_gen_ull:
18998
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
18999
19000
0
  case NVPTX::BI__nvvm_atom_min_gen_i:
19001
0
  case NVPTX::BI__nvvm_atom_min_gen_l:
19002
0
  case NVPTX::BI__nvvm_atom_min_gen_ll:
19003
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
19004
19005
0
  case NVPTX::BI__nvvm_atom_min_gen_ui:
19006
0
  case NVPTX::BI__nvvm_atom_min_gen_ul:
19007
0
  case NVPTX::BI__nvvm_atom_min_gen_ull:
19008
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
19009
19010
0
  case NVPTX::BI__nvvm_atom_cas_gen_i:
19011
0
  case NVPTX::BI__nvvm_atom_cas_gen_l:
19012
0
  case NVPTX::BI__nvvm_atom_cas_gen_ll:
19013
    // __nvvm_atom_cas_gen_* should return the old value rather than the
19014
    // success flag.
19015
0
    return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
19016
19017
0
  case NVPTX::BI__nvvm_atom_add_gen_f:
19018
2
  case NVPTX::BI__nvvm_atom_add_gen_d: {
19019
2
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19020
2
    Value *Val = EmitScalarExpr(E->getArg(1));
19021
2
    return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
19022
2
                                   AtomicOrdering::SequentiallyConsistent);
19023
0
  }
19024
19025
0
  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
19026
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19027
0
    Value *Val = EmitScalarExpr(E->getArg(1));
19028
0
    Function *FnALI32 =
19029
0
        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
19030
0
    return Builder.CreateCall(FnALI32, {Ptr, Val});
19031
0
  }
19032
19033
0
  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
19034
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19035
0
    Value *Val = EmitScalarExpr(E->getArg(1));
19036
0
    Function *FnALD32 =
19037
0
        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
19038
0
    return Builder.CreateCall(FnALD32, {Ptr, Val});
19039
0
  }
19040
19041
0
  case NVPTX::BI__nvvm_ldg_c:
19042
0
  case NVPTX::BI__nvvm_ldg_sc:
19043
0
  case NVPTX::BI__nvvm_ldg_c2:
19044
0
  case NVPTX::BI__nvvm_ldg_sc2:
19045
0
  case NVPTX::BI__nvvm_ldg_c4:
19046
0
  case NVPTX::BI__nvvm_ldg_sc4:
19047
0
  case NVPTX::BI__nvvm_ldg_s:
19048
0
  case NVPTX::BI__nvvm_ldg_s2:
19049
0
  case NVPTX::BI__nvvm_ldg_s4:
19050
0
  case NVPTX::BI__nvvm_ldg_i:
19051
0
  case NVPTX::BI__nvvm_ldg_i2:
19052
0
  case NVPTX::BI__nvvm_ldg_i4:
19053
0
  case NVPTX::BI__nvvm_ldg_l:
19054
0
  case NVPTX::BI__nvvm_ldg_l2:
19055
0
  case NVPTX::BI__nvvm_ldg_ll:
19056
0
  case NVPTX::BI__nvvm_ldg_ll2:
19057
0
  case NVPTX::BI__nvvm_ldg_uc:
19058
0
  case NVPTX::BI__nvvm_ldg_uc2:
19059
0
  case NVPTX::BI__nvvm_ldg_uc4:
19060
0
  case NVPTX::BI__nvvm_ldg_us:
19061
0
  case NVPTX::BI__nvvm_ldg_us2:
19062
0
  case NVPTX::BI__nvvm_ldg_us4:
19063
0
  case NVPTX::BI__nvvm_ldg_ui:
19064
0
  case NVPTX::BI__nvvm_ldg_ui2:
19065
0
  case NVPTX::BI__nvvm_ldg_ui4:
19066
0
  case NVPTX::BI__nvvm_ldg_ul:
19067
0
  case NVPTX::BI__nvvm_ldg_ul2:
19068
0
  case NVPTX::BI__nvvm_ldg_ull:
19069
0
  case NVPTX::BI__nvvm_ldg_ull2:
19070
    // PTX Interoperability section 2.2: "For a vector with an even number of
19071
    // elements, its alignment is set to number of elements times the alignment
19072
    // of its member: n*alignof(t)."
19073
0
    return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
19074
0
  case NVPTX::BI__nvvm_ldg_f:
19075
0
  case NVPTX::BI__nvvm_ldg_f2:
19076
0
  case NVPTX::BI__nvvm_ldg_f4:
19077
0
  case NVPTX::BI__nvvm_ldg_d:
19078
0
  case NVPTX::BI__nvvm_ldg_d2:
19079
0
    return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
19080
19081
0
  case NVPTX::BI__nvvm_ldu_c:
19082
0
  case NVPTX::BI__nvvm_ldu_sc:
19083
0
  case NVPTX::BI__nvvm_ldu_c2:
19084
0
  case NVPTX::BI__nvvm_ldu_sc2:
19085
0
  case NVPTX::BI__nvvm_ldu_c4:
19086
0
  case NVPTX::BI__nvvm_ldu_sc4:
19087
0
  case NVPTX::BI__nvvm_ldu_s:
19088
0
  case NVPTX::BI__nvvm_ldu_s2:
19089
0
  case NVPTX::BI__nvvm_ldu_s4:
19090
0
  case NVPTX::BI__nvvm_ldu_i:
19091
0
  case NVPTX::BI__nvvm_ldu_i2:
19092
0
  case NVPTX::BI__nvvm_ldu_i4:
19093
0
  case NVPTX::BI__nvvm_ldu_l:
19094
0
  case NVPTX::BI__nvvm_ldu_l2:
19095
0
  case NVPTX::BI__nvvm_ldu_ll:
19096
0
  case NVPTX::BI__nvvm_ldu_ll2:
19097
0
  case NVPTX::BI__nvvm_ldu_uc:
19098
0
  case NVPTX::BI__nvvm_ldu_uc2:
19099
0
  case NVPTX::BI__nvvm_ldu_uc4:
19100
0
  case NVPTX::BI__nvvm_ldu_us:
19101
0
  case NVPTX::BI__nvvm_ldu_us2:
19102
0
  case NVPTX::BI__nvvm_ldu_us4:
19103
0
  case NVPTX::BI__nvvm_ldu_ui:
19104
0
  case NVPTX::BI__nvvm_ldu_ui2:
19105
0
  case NVPTX::BI__nvvm_ldu_ui4:
19106
0
  case NVPTX::BI__nvvm_ldu_ul:
19107
0
  case NVPTX::BI__nvvm_ldu_ul2:
19108
0
  case NVPTX::BI__nvvm_ldu_ull:
19109
0
  case NVPTX::BI__nvvm_ldu_ull2:
19110
0
    return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
19111
0
  case NVPTX::BI__nvvm_ldu_f:
19112
0
  case NVPTX::BI__nvvm_ldu_f2:
19113
0
  case NVPTX::BI__nvvm_ldu_f4:
19114
0
  case NVPTX::BI__nvvm_ldu_d:
19115
0
  case NVPTX::BI__nvvm_ldu_d2:
19116
0
    return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
19117
19118
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
19119
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
19120
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
19121
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
19122
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
19123
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
19124
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
19125
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
19126
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
19127
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
19128
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
19129
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
19130
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
19131
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
19132
0
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
19133
0
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
19134
0
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
19135
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
19136
0
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
19137
0
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
19138
0
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
19139
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
19140
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
19141
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
19142
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
19143
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
19144
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
19145
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
19146
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
19147
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
19148
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
19149
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
19150
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
19151
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
19152
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
19153
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
19154
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
19155
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
19156
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
19157
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
19158
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
19159
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
19160
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
19161
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
19162
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
19163
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
19164
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
19165
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
19166
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
19167
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
19168
0
  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
19169
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
19170
0
  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
19171
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
19172
0
  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
19173
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
19174
0
  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
19175
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
19176
0
  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
19177
0
  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
19178
0
  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
19179
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
19180
0
  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
19181
0
  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
19182
0
  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
19183
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
19184
0
  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
19185
0
  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
19186
0
  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
19187
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
19188
0
  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
19189
0
  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
19190
0
  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
19191
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
19192
0
  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
19193
0
  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
19194
0
  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
19195
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
19196
0
  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
19197
0
  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
19198
0
  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
19199
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
19200
0
  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
19201
0
  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
19202
0
  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
19203
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19204
0
    llvm::Type *ElemTy =
19205
0
        ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19206
0
    return Builder.CreateCall(
19207
0
        CGM.getIntrinsic(
19208
0
            Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
19209
0
        {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
19210
0
  }
19211
0
  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
19212
0
  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
19213
0
  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
19214
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19215
0
    llvm::Type *ElemTy =
19216
0
        ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19217
0
    return Builder.CreateCall(
19218
0
        CGM.getIntrinsic(
19219
0
            Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
19220
0
        {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
19221
0
  }
19222
4
  case NVPTX::BI__nvvm_match_all_sync_i32p:
19223
8
  case NVPTX::BI__nvvm_match_all_sync_i64p: {
19224
8
    Value *Mask = EmitScalarExpr(E->getArg(0));
19225
8
    Value *Val = EmitScalarExpr(E->getArg(1));
19226
8
    Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
19227
8
    Value *ResultPair = Builder.CreateCall(
19228
8
        CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
19229
8
                             ? 
Intrinsic::nvvm_match_all_sync_i32p4
19230
8
                             : 
Intrinsic::nvvm_match_all_sync_i64p4
),
19231
8
        {Mask, Val});
19232
8
    Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
19233
8
                                     PredOutPtr.getElementType());
19234
8
    Builder.CreateStore(Pred, PredOutPtr);
19235
8
    return Builder.CreateExtractValue(ResultPair, 0);
19236
4
  }
19237
19238
  // FP MMA loads
19239
14
  case NVPTX::BI__hmma_m16n16k16_ld_a:
19240
28
  case NVPTX::BI__hmma_m16n16k16_ld_b:
19241
42
  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19242
56
  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19243
66
  case NVPTX::BI__hmma_m32n8k16_ld_a:
19244
76
  case NVPTX::BI__hmma_m32n8k16_ld_b:
19245
86
  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19246
96
  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19247
106
  case NVPTX::BI__hmma_m8n32k16_ld_a:
19248
116
  case NVPTX::BI__hmma_m8n32k16_ld_b:
19249
126
  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19250
136
  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19251
  // Integer MMA loads.
19252
140
  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19253
144
  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19254
148
  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19255
152
  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19256
156
  case NVPTX::BI__imma_m16n16k16_ld_c:
19257
160
  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19258
164
  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19259
168
  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19260
172
  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19261
176
  case NVPTX::BI__imma_m32n8k16_ld_c:
19262
180
  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19263
184
  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19264
188
  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19265
192
  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19266
196
  case NVPTX::BI__imma_m8n32k16_ld_c:
19267
  // Sub-integer MMA loads.
19268
198
  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19269
200
  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19270
202
  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19271
204
  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19272
208
  case NVPTX::BI__imma_m8n8k32_ld_c:
19273
210
  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19274
212
  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19275
216
  case NVPTX::BI__bmma_m8n8k128_ld_c:
19276
  // Double MMA loads.
19277
220
  case NVPTX::BI__dmma_m8n8k4_ld_a:
19278
224
  case NVPTX::BI__dmma_m8n8k4_ld_b:
19279
228
  case NVPTX::BI__dmma_m8n8k4_ld_c:
19280
  // Alternate float MMA loads.
19281
232
  case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19282
236
  case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19283
240
  case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19284
244
  case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19285
248
  case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19286
252
  case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19287
256
  case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19288
260
  case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19289
264
  case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
19290
264
    Address Dst = EmitPointerWithAlignment(E->getArg(0));
19291
264
    Value *Src = EmitScalarExpr(E->getArg(1));
19292
264
    Value *Ldm = EmitScalarExpr(E->getArg(2));
19293
264
    std::optional<llvm::APSInt> isColMajorArg =
19294
264
        E->getArg(3)->getIntegerConstantExpr(getContext());
19295
264
    if (!isColMajorArg)
19296
0
      return nullptr;
19297
264
    bool isColMajor = isColMajorArg->getSExtValue();
19298
264
    NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
19299
264
    unsigned IID = isColMajor ? 
II.IID_col132
:
II.IID_row132
;
19300
264
    if (IID == 0)
19301
0
      return nullptr;
19302
19303
264
    Value *Result =
19304
264
        Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
19305
19306
    // Save returned values.
19307
264
    assert(II.NumResults);
19308
264
    if (II.NumResults == 1) {
19309
36
      Builder.CreateAlignedStore(Result, Dst.getPointer(),
19310
36
                                 CharUnits::fromQuantity(4));
19311
228
    } else {
19312
1.57k
      for (unsigned i = 0; i < II.NumResults; 
++i1.34k
) {
19313
1.34k
        Builder.CreateAlignedStore(
19314
1.34k
            Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
19315
1.34k
                                  Dst.getElementType()),
19316
1.34k
            Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
19317
1.34k
                              llvm::ConstantInt::get(IntTy, i)),
19318
1.34k
            CharUnits::fromQuantity(4));
19319
1.34k
      }
19320
228
    }
19321
264
    return Result;
19322
264
  }
19323
19324
14
  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19325
28
  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19326
38
  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19327
48
  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19328
58
  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19329
68
  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19330
72
  case NVPTX::BI__imma_m16n16k16_st_c_i32:
19331
76
  case NVPTX::BI__imma_m32n8k16_st_c_i32:
19332
80
  case NVPTX::BI__imma_m8n32k16_st_c_i32:
19333
84
  case NVPTX::BI__imma_m8n8k32_st_c_i32:
19334
88
  case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19335
92
  case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19336
96
  case NVPTX::BI__mma_m16n16k8_st_c_f32: {
19337
96
    Value *Dst = EmitScalarExpr(E->getArg(0));
19338
96
    Address Src = EmitPointerWithAlignment(E->getArg(1));
19339
96
    Value *Ldm = EmitScalarExpr(E->getArg(2));
19340
96
    std::optional<llvm::APSInt> isColMajorArg =
19341
96
        E->getArg(3)->getIntegerConstantExpr(getContext());
19342
96
    if (!isColMajorArg)
19343
0
      return nullptr;
19344
96
    bool isColMajor = isColMajorArg->getSExtValue();
19345
96
    NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
19346
96
    unsigned IID = isColMajor ? 
II.IID_col48
:
II.IID_row48
;
19347
96
    if (IID == 0)
19348
0
      return nullptr;
19349
96
    Function *Intrinsic =
19350
96
        CGM.getIntrinsic(IID, Dst->getType());
19351
96
    llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
19352
96
    SmallVector<Value *, 10> Values = {Dst};
19353
656
    for (unsigned i = 0; i < II.NumResults; 
++i560
) {
19354
560
      Value *V = Builder.CreateAlignedLoad(
19355
560
          Src.getElementType(),
19356
560
          Builder.CreateGEP(Src.getElementType(), Src.getPointer(),
19357
560
                            llvm::ConstantInt::get(IntTy, i)),
19358
560
          CharUnits::fromQuantity(4));
19359
560
      Values.push_back(Builder.CreateBitCast(V, ParamType));
19360
560
    }
19361
96
    Values.push_back(Ldm);
19362
96
    Value *Result = Builder.CreateCall(Intrinsic, Values);
19363
96
    return Result;
19364
96
  }
19365
19366
  // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
19367
  // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
19368
56
  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19369
112
  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19370
168
  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19371
224
  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19372
264
  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19373
304
  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19374
344
  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19375
384
  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19376
424
  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19377
464
  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19378
504
  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19379
544
  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19380
560
  case NVPTX::BI__imma_m16n16k16_mma_s8:
19381
576
  case NVPTX::BI__imma_m16n16k16_mma_u8:
19382
592
  case NVPTX::BI__imma_m32n8k16_mma_s8:
19383
608
  case NVPTX::BI__imma_m32n8k16_mma_u8:
19384
624
  case NVPTX::BI__imma_m8n32k16_mma_s8:
19385
640
  case NVPTX::BI__imma_m8n32k16_mma_u8:
19386
644
  case NVPTX::BI__imma_m8n8k32_mma_s4:
19387
648
  case NVPTX::BI__imma_m8n8k32_mma_u4:
19388
650
  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19389
652
  case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19390
660
  case NVPTX::BI__dmma_m8n8k4_mma_f64:
19391
668
  case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19392
676
  case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19393
684
  case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19394
692
  case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
19395
692
    Address Dst = EmitPointerWithAlignment(E->getArg(0));
19396
692
    Address SrcA = EmitPointerWithAlignment(E->getArg(1));
19397
692
    Address SrcB = EmitPointerWithAlignment(E->getArg(2));
19398
692
    Address SrcC = EmitPointerWithAlignment(E->getArg(3));
19399
692
    std::optional<llvm::APSInt> LayoutArg =
19400
692
        E->getArg(4)->getIntegerConstantExpr(getContext());
19401
692
    if (!LayoutArg)
19402
0
      return nullptr;
19403
692
    int Layout = LayoutArg->getSExtValue();
19404
692
    if (Layout < 0 || Layout > 3)
19405
0
      return nullptr;
19406
692
    llvm::APSInt SatfArg;
19407
692
    if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
19408
692
        
BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1690
)
19409
4
      SatfArg = 0;  // .b1 does not have satf argument.
19410
688
    else if (std::optional<llvm::APSInt> OptSatfArg =
19411
688
                 E->getArg(5)->getIntegerConstantExpr(getContext()))
19412
688
      SatfArg = *OptSatfArg;
19413
0
    else
19414
0
      return nullptr;
19415
692
    bool Satf = SatfArg.getSExtValue();
19416
692
    NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
19417
692
    unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
19418
692
    if (IID == 0)  // Unsupported combination of Layout/Satf.
19419
0
      return nullptr;
19420
19421
692
    SmallVector<Value *, 24> Values;
19422
692
    Function *Intrinsic = CGM.getIntrinsic(IID);
19423
692
    llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
19424
    // Load A
19425
5.43k
    for (unsigned i = 0; i < MI.NumEltsA; 
++i4.74k
) {
19426
4.74k
      Value *V = Builder.CreateAlignedLoad(
19427
4.74k
          SrcA.getElementType(),
19428
4.74k
          Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(),
19429
4.74k
                            llvm::ConstantInt::get(IntTy, i)),
19430
4.74k
          CharUnits::fromQuantity(4));
19431
4.74k
      Values.push_back(Builder.CreateBitCast(V, AType));
19432
4.74k
    }
19433
    // Load B
19434
692
    llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
19435
5.43k
    for (unsigned i = 0; i < MI.NumEltsB; 
++i4.74k
) {
19436
4.74k
      Value *V = Builder.CreateAlignedLoad(
19437
4.74k
          SrcB.getElementType(),
19438
4.74k
          Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(),
19439
4.74k
                            llvm::ConstantInt::get(IntTy, i)),
19440
4.74k
          CharUnits::fromQuantity(4));
19441
4.74k
      Values.push_back(Builder.CreateBitCast(V, BType));
19442
4.74k
    }
19443
    // Load C
19444
692
    llvm::Type *CType =
19445
692
        Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
19446
5.02k
    for (unsigned i = 0; i < MI.NumEltsC; 
++i4.32k
) {
19447
4.32k
      Value *V = Builder.CreateAlignedLoad(
19448
4.32k
          SrcC.getElementType(),
19449
4.32k
          Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(),
19450
4.32k
                            llvm::ConstantInt::get(IntTy, i)),
19451
4.32k
          CharUnits::fromQuantity(4));
19452
4.32k
      Values.push_back(Builder.CreateBitCast(V, CType));
19453
4.32k
    }
19454
692
    Value *Result = Builder.CreateCall(Intrinsic, Values);
19455
692
    llvm::Type *DType = Dst.getElementType();
19456
5.02k
    for (unsigned i = 0; i < MI.NumEltsD; 
++i4.32k
)
19457
4.32k
      Builder.CreateAlignedStore(
19458
4.32k
          Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
19459
4.32k
          Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
19460
4.32k
                            llvm::ConstantInt::get(IntTy, i)),
19461
4.32k
          CharUnits::fromQuantity(4));
19462
692
    return Result;
19463
692
  }
19464
  // The following builtins require half type support
19465
0
  case NVPTX::BI__nvvm_ex2_approx_f16:
19466
0
    return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
19467
0
  case NVPTX::BI__nvvm_ex2_approx_f16x2:
19468
0
    return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
19469
0
  case NVPTX::BI__nvvm_ff2f16x2_rn:
19470
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
19471
0
  case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
19472
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
19473
0
  case NVPTX::BI__nvvm_ff2f16x2_rz:
19474
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
19475
0
  case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
19476
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
19477
0
  case NVPTX::BI__nvvm_fma_rn_f16:
19478
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
19479
0
  case NVPTX::BI__nvvm_fma_rn_f16x2:
19480
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
19481
0
  case NVPTX::BI__nvvm_fma_rn_ftz_f16:
19482
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
19483
0
  case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
19484
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
19485
0
  case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
19486
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
19487
0
                        *this);
19488
0
  case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
19489
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
19490
0
                        *this);
19491
0
  case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
19492
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
19493
0
                        *this);
19494
0
  case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
19495
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
19496
0
                        *this);
19497
0
  case NVPTX::BI__nvvm_fma_rn_relu_f16:
19498
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
19499
0
  case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
19500
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
19501
0
  case NVPTX::BI__nvvm_fma_rn_sat_f16:
19502
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
19503
0
  case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
19504
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
19505
0
  case NVPTX::BI__nvvm_fmax_f16:
19506
0
    return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
19507
0
  case NVPTX::BI__nvvm_fmax_f16x2:
19508
0
    return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
19509
0
  case NVPTX::BI__nvvm_fmax_ftz_f16:
19510
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
19511
0
  case NVPTX::BI__nvvm_fmax_ftz_f16x2:
19512
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
19513
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
19514
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
19515
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
19516
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
19517
0
                        *this);
19518
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
19519
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
19520
0
                        E, *this);
19521
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
19522
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
19523
0
                        BuiltinID, E, *this);
19524
0
  case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
19525
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
19526
0
                        *this);
19527
0
  case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
19528
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
19529
0
                        E, *this);
19530
0
  case NVPTX::BI__nvvm_fmax_nan_f16:
19531
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
19532
0
  case NVPTX::BI__nvvm_fmax_nan_f16x2:
19533
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
19534
0
  case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
19535
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
19536
0
                        *this);
19537
0
  case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
19538
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
19539
0
                        E, *this);
19540
0
  case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
19541
0
    return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
19542
0
                        *this);
19543
0
  case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
19544
0
    return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
19545
0
                        *this);
19546
0
  case NVPTX::BI__nvvm_fmin_f16:
19547
0
    return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
19548
0
  case NVPTX::BI__nvvm_fmin_f16x2:
19549
0
    return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
19550
0
  case NVPTX::BI__nvvm_fmin_ftz_f16:
19551
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
19552
0
  case NVPTX::BI__nvvm_fmin_ftz_f16x2:
19553
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
19554
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
19555
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
19556
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
19557
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
19558
0
                        *this);
19559
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
19560
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
19561
0
                        E, *this);
19562
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
19563
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
19564
0
                        BuiltinID, E, *this);
19565
0
  case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
19566
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
19567
0
                        *this);
19568
0
  case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
19569
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
19570
0
                        E, *this);
19571
0
  case NVPTX::BI__nvvm_fmin_nan_f16:
19572
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
19573
0
  case NVPTX::BI__nvvm_fmin_nan_f16x2:
19574
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
19575
0
  case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
19576
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
19577
0
                        *this);
19578
0
  case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
19579
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
19580
0
                        E, *this);
19581
0
  case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
19582
0
    return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
19583
0
                        *this);
19584
0
  case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
19585
0
    return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
19586
0
                        *this);
19587
0
  case NVPTX::BI__nvvm_ldg_h:
19588
0
    return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
19589
0
  case NVPTX::BI__nvvm_ldg_h2:
19590
0
    return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
19591
0
  case NVPTX::BI__nvvm_ldu_h:
19592
0
    return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
19593
0
  case NVPTX::BI__nvvm_ldu_h2: {
19594
0
    return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
19595
692
  }
19596
0
  case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
19597
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
19598
0
                       Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
19599
0
                       4);
19600
0
  case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
19601
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
19602
0
                       Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
19603
0
                       8);
19604
0
  case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
19605
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
19606
0
                       Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
19607
0
                       16);
19608
0
  case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
19609
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
19610
0
                       Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
19611
0
                       16);
19612
1
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
19613
1
    return Builder.CreateCall(
19614
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
19615
1
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
19616
1
    return Builder.CreateCall(
19617
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
19618
1
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
19619
1
    return Builder.CreateCall(
19620
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
19621
1
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
19622
1
    return Builder.CreateCall(
19623
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
19624
1
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
19625
1
    return Builder.CreateCall(
19626
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
19627
1
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
19628
1
    return Builder.CreateCall(
19629
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
19630
1
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
19631
1
    return Builder.CreateCall(
19632
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
19633
1
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
19634
1
    return Builder.CreateCall(
19635
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
19636
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
19637
1
    return Builder.CreateCall(
19638
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
19639
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
19640
1
    return Builder.CreateCall(
19641
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
19642
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
19643
1
    return Builder.CreateCall(
19644
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
19645
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
19646
1
    return Builder.CreateCall(
19647
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
19648
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
19649
1
    return Builder.CreateCall(
19650
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
19651
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
19652
1
    return Builder.CreateCall(
19653
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
19654
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
19655
1
    return Builder.CreateCall(
19656
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
19657
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
19658
1
    return Builder.CreateCall(
19659
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
19660
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
19661
1
    return Builder.CreateCall(
19662
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
19663
1
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
19664
1
    return Builder.CreateCall(
19665
1
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
19666
1
  case NVPTX::BI__nvvm_is_explicit_cluster:
19667
1
    return Builder.CreateCall(
19668
1
        CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
19669
1
  case NVPTX::BI__nvvm_isspacep_shared_cluster:
19670
1
    return Builder.CreateCall(
19671
1
        CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
19672
1
        EmitScalarExpr(E->getArg(0)));
19673
1
  case NVPTX::BI__nvvm_mapa:
19674
1
    return Builder.CreateCall(
19675
1
        CGM.getIntrinsic(Intrinsic::nvvm_mapa),
19676
1
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
19677
1
  case NVPTX::BI__nvvm_mapa_shared_cluster:
19678
1
    return Builder.CreateCall(
19679
1
        CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
19680
1
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
19681
1
  case NVPTX::BI__nvvm_getctarank:
19682
1
    return Builder.CreateCall(
19683
1
        CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
19684
1
        EmitScalarExpr(E->getArg(0)));
19685
1
  case NVPTX::BI__nvvm_getctarank_shared_cluster:
19686
1
    return Builder.CreateCall(
19687
1
        CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
19688
1
        EmitScalarExpr(E->getArg(0)));
19689
1
  case NVPTX::BI__nvvm_barrier_cluster_arrive:
19690
1
    return Builder.CreateCall(
19691
1
        CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
19692
1
  case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
19693
1
    return Builder.CreateCall(
19694
1
        CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
19695
1
  case NVPTX::BI__nvvm_barrier_cluster_wait:
19696
1
    return Builder.CreateCall(
19697
1
        CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
19698
1
  case NVPTX::BI__nvvm_fence_sc_cluster:
19699
1
    return Builder.CreateCall(
19700
1
        CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
19701
0
  default:
19702
0
    return nullptr;
19703
1.09k
  }
19704
1.09k
}
19705
19706
namespace {
19707
struct BuiltinAlignArgs {
19708
  llvm::Value *Src = nullptr;
19709
  llvm::Type *SrcType = nullptr;
19710
  llvm::Value *Alignment = nullptr;
19711
  llvm::Value *Mask = nullptr;
19712
  llvm::IntegerType *IntType = nullptr;
19713
19714
18
  BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
19715
18
    QualType AstType = E->getArg(0)->getType();
19716
18
    if (AstType->isArrayType())
19717
0
      Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
19718
18
    else
19719
18
      Src = CGF.EmitScalarExpr(E->getArg(0));
19720
18
    SrcType = Src->getType();
19721
18
    if (SrcType->isPointerTy()) {
19722
12
      IntType = IntegerType::get(
19723
12
          CGF.getLLVMContext(),
19724
12
          CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
19725
12
    } else {
19726
6
      assert(SrcType->isIntegerTy());
19727
6
      IntType = cast<llvm::IntegerType>(SrcType);
19728
6
    }
19729
18
    Alignment = CGF.EmitScalarExpr(E->getArg(1));
19730
18
    Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
19731
18
    auto *One = llvm::ConstantInt::get(IntType, 1);
19732
18
    Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
19733
18
  }
19734
};
19735
} // namespace
19736
19737
/// Generate (x & (y-1)) == 0.
19738
5
RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
19739
5
  BuiltinAlignArgs Args(E, *this);
19740
5
  llvm::Value *SrcAddress = Args.Src;
19741
5
  if (Args.SrcType->isPointerTy())
19742
3
    SrcAddress =
19743
3
        Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
19744
5
  return RValue::get(Builder.CreateICmpEQ(
19745
5
      Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
19746
5
      llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
19747
5
}
19748
19749
/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
19750
/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
19751
/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
19752
13
RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
19753
13
  BuiltinAlignArgs Args(E, *this);
19754
13
  llvm::Value *SrcForMask = Args.Src;
19755
13
  if (AlignUp) {
19756
    // When aligning up we have to first add the mask to ensure we go over the
19757
    // next alignment value and then align down to the next valid multiple.
19758
    // By adding the mask, we ensure that align_up on an already aligned
19759
    // value will not change the value.
19760
7
    if (Args.Src->getType()->isPointerTy()) {
19761
5
      if (getLangOpts().isSignedOverflowDefined())
19762
0
        SrcForMask =
19763
0
            Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
19764
5
      else
19765
5
        SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
19766
5
                                            /*SignedIndices=*/true,
19767
5
                                            /*isSubtraction=*/false,
19768
5
                                            E->getExprLoc(), "over_boundary");
19769
5
    } else {
19770
2
      SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
19771
2
    }
19772
7
  }
19773
  // Invert the mask to only clear the lower bits.
19774
13
  llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
19775
13
  llvm::Value *Result = nullptr;
19776
13
  if (Args.Src->getType()->isPointerTy()) {
19777
9
    Result = Builder.CreateIntrinsic(
19778
9
        Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
19779
9
        {SrcForMask, InvertedMask}, nullptr, "aligned_result");
19780
9
  } else {
19781
4
    Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
19782
4
  }
19783
13
  assert(Result->getType() == Args.SrcType);
19784
13
  return RValue::get(Result);
19785
13
}
19786
19787
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
19788
283
                                                   const CallExpr *E) {
19789
283
  switch (BuiltinID) {
19790
3
  case WebAssembly::BI__builtin_wasm_memory_size: {
19791
3
    llvm::Type *ResultType = ConvertType(E->getType());
19792
3
    Value *I = EmitScalarExpr(E->getArg(0));
19793
3
    Function *Callee =
19794
3
        CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
19795
3
    return Builder.CreateCall(Callee, I);
19796
0
  }
19797
3
  case WebAssembly::BI__builtin_wasm_memory_grow: {
19798
3
    llvm::Type *ResultType = ConvertType(E->getType());
19799
3
    Value *Args[] = {EmitScalarExpr(E->getArg(0)),
19800
3
                     EmitScalarExpr(E->getArg(1))};
19801
3
    Function *Callee =
19802
3
        CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
19803
3
    return Builder.CreateCall(Callee, Args);
19804
0
  }
19805
3
  case WebAssembly::BI__builtin_wasm_tls_size: {
19806
3
    llvm::Type *ResultType = ConvertType(E->getType());
19807
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
19808
3
    return Builder.CreateCall(Callee);
19809
0
  }
19810
3
  case WebAssembly::BI__builtin_wasm_tls_align: {
19811
3
    llvm::Type *ResultType = ConvertType(E->getType());
19812
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
19813
3
    return Builder.CreateCall(Callee);
19814
0
  }
19815
3
  case WebAssembly::BI__builtin_wasm_tls_base: {
19816
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
19817
3
    return Builder.CreateCall(Callee);
19818
0
  }
19819
3
  case WebAssembly::BI__builtin_wasm_throw: {
19820
3
    Value *Tag = EmitScalarExpr(E->getArg(0));
19821
3
    Value *Obj = EmitScalarExpr(E->getArg(1));
19822
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
19823
3
    return Builder.CreateCall(Callee, {Tag, Obj});
19824
0
  }
19825
3
  case WebAssembly::BI__builtin_wasm_rethrow: {
19826
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
19827
3
    return Builder.CreateCall(Callee);
19828
0
  }
19829
3
  case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
19830
3
    Value *Addr = EmitScalarExpr(E->getArg(0));
19831
3
    Value *Expected = EmitScalarExpr(E->getArg(1));
19832
3
    Value *Timeout = EmitScalarExpr(E->getArg(2));
19833
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
19834
3
    return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
19835
0
  }
19836
3
  case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
19837
3
    Value *Addr = EmitScalarExpr(E->getArg(0));
19838
3
    Value *Expected = EmitScalarExpr(E->getArg(1));
19839
3
    Value *Timeout = EmitScalarExpr(E->getArg(2));
19840
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
19841
3
    return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
19842
0
  }
19843
3
  case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
19844
3
    Value *Addr = EmitScalarExpr(E->getArg(0));
19845
3
    Value *Count = EmitScalarExpr(E->getArg(1));
19846
3
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
19847
3
    return Builder.CreateCall(Callee, {Addr, Count});
19848
0
  }
19849
3
  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
19850
6
  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
19851
9
  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
19852
12
  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
19853
12
    Value *Src = EmitScalarExpr(E->getArg(0));
19854
12
    llvm::Type *ResT = ConvertType(E->getType());
19855
12
    Function *Callee =
19856
12
        CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
19857
12
    return Builder.CreateCall(Callee, {Src});
19858
9
  }
19859
3
  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
19860
6
  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
19861
9
  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
19862
12
  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
19863
12
    Value *Src = EmitScalarExpr(E->getArg(0));
19864
12
    llvm::Type *ResT = ConvertType(E->getType());
19865
12
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
19866
12
                                        {ResT, Src->getType()});
19867
12
    return Builder.CreateCall(Callee, {Src});
19868
9
  }
19869
3
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
19870
6
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
19871
9
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
19872
12
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
19873
14
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
19874
14
    Value *Src = EmitScalarExpr(E->getArg(0));
19875
14
    llvm::Type *ResT = ConvertType(E->getType());
19876
14
    Function *Callee =
19877
14
        CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
19878
14
    return Builder.CreateCall(Callee, {Src});
19879
12
  }
19880
3
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
19881
6
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
19882
9
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
19883
12
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
19884
14
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
19885
14
    Value *Src = EmitScalarExpr(E->getArg(0));
19886
14
    llvm::Type *ResT = ConvertType(E->getType());
19887
14
    Function *Callee =
19888
14
        CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
19889
14
    return Builder.CreateCall(Callee, {Src});
19890
12
  }
19891
3
  case WebAssembly::BI__builtin_wasm_min_f32:
19892
6
  case WebAssembly::BI__builtin_wasm_min_f64:
19893
8
  case WebAssembly::BI__builtin_wasm_min_f32x4:
19894
10
  case WebAssembly::BI__builtin_wasm_min_f64x2: {
19895
10
    Value *LHS = EmitScalarExpr(E->getArg(0));
19896
10
    Value *RHS = EmitScalarExpr(E->getArg(1));
19897
10
    Function *Callee =
19898
10
        CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
19899
10
    return Builder.CreateCall(Callee, {LHS, RHS});
19900
8
  }
19901
3
  case WebAssembly::BI__builtin_wasm_max_f32:
19902
6
  case WebAssembly::BI__builtin_wasm_max_f64:
19903
8
  case WebAssembly::BI__builtin_wasm_max_f32x4:
19904
10
  case WebAssembly::BI__builtin_wasm_max_f64x2: {
19905
10
    Value *LHS = EmitScalarExpr(E->getArg(0));
19906
10
    Value *RHS = EmitScalarExpr(E->getArg(1));
19907
10
    Function *Callee =
19908
10
        CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
19909
10
    return Builder.CreateCall(Callee, {LHS, RHS});
19910
8
  }
19911
2
  case WebAssembly::BI__builtin_wasm_pmin_f32x4:
19912
4
  case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
19913
4
    Value *LHS = EmitScalarExpr(E->getArg(0));
19914
4
    Value *RHS = EmitScalarExpr(E->getArg(1));
19915
4
    Function *Callee =
19916
4
        CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
19917
4
    return Builder.CreateCall(Callee, {LHS, RHS});
19918
2
  }
19919
2
  case WebAssembly::BI__builtin_wasm_pmax_f32x4:
19920
4
  case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
19921
4
    Value *LHS = EmitScalarExpr(E->getArg(0));
19922
4
    Value *RHS = EmitScalarExpr(E->getArg(1));
19923
4
    Function *Callee =
19924
4
        CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
19925
4
    return Builder.CreateCall(Callee, {LHS, RHS});
19926
2
  }
19927
2
  case WebAssembly::BI__builtin_wasm_ceil_f32x4:
19928
4
  case WebAssembly::BI__builtin_wasm_floor_f32x4:
19929
6
  case WebAssembly::BI__builtin_wasm_trunc_f32x4:
19930
8
  case WebAssembly::BI__builtin_wasm_nearest_f32x4:
19931
10
  case WebAssembly::BI__builtin_wasm_ceil_f64x2:
19932
12
  case WebAssembly::BI__builtin_wasm_floor_f64x2:
19933
14
  case WebAssembly::BI__builtin_wasm_trunc_f64x2:
19934
16
  case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
19935
16
    unsigned IntNo;
19936
16
    switch (BuiltinID) {
19937
2
    case WebAssembly::BI__builtin_wasm_ceil_f32x4:
19938
4
    case WebAssembly::BI__builtin_wasm_ceil_f64x2:
19939
4
      IntNo = Intrinsic::ceil;
19940
4
      break;
19941
2
    case WebAssembly::BI__builtin_wasm_floor_f32x4:
19942
4
    case WebAssembly::BI__builtin_wasm_floor_f64x2:
19943
4
      IntNo = Intrinsic::floor;
19944
4
      break;
19945
2
    case WebAssembly::BI__builtin_wasm_trunc_f32x4:
19946
4
    case WebAssembly::BI__builtin_wasm_trunc_f64x2:
19947
4
      IntNo = Intrinsic::trunc;
19948
4
      break;
19949
2
    case WebAssembly::BI__builtin_wasm_nearest_f32x4:
19950
4
    case WebAssembly::BI__builtin_wasm_nearest_f64x2:
19951
4
      IntNo = Intrinsic::nearbyint;
19952
4
      break;
19953
0
    default:
19954
0
      llvm_unreachable("unexpected builtin ID");
19955
16
    }
19956
16
    Value *Value = EmitScalarExpr(E->getArg(0));
19957
16
    Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
19958
16
    return Builder.CreateCall(Callee, Value);
19959
16
  }
19960
2
  case WebAssembly::BI__builtin_wasm_ref_null_extern: {
19961
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
19962
2
    return Builder.CreateCall(Callee);
19963
16
  }
19964
2
  case WebAssembly::BI__builtin_wasm_ref_null_func: {
19965
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
19966
2
    return Builder.CreateCall(Callee);
19967
16
  }
19968
2
  case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
19969
2
    Value *Src = EmitScalarExpr(E->getArg(0));
19970
2
    Value *Indices = EmitScalarExpr(E->getArg(1));
19971
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
19972
2
    return Builder.CreateCall(Callee, {Src, Indices});
19973
16
  }
19974
3
  case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
19975
5
  case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
19976
7
  case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
19977
9
  case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
19978
11
  case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
19979
13
  case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
19980
15
  case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
19981
17
  case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
19982
17
    unsigned IntNo;
19983
17
    switch (BuiltinID) {
19984
3
    case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
19985
5
    case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
19986
5
      IntNo = Intrinsic::sadd_sat;
19987
5
      break;
19988
2
    case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
19989
4
    case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
19990
4
      IntNo = Intrinsic::uadd_sat;
19991
4
      break;
19992
2
    case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
19993
4
    case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
19994
4
      IntNo = Intrinsic::wasm_sub_sat_signed;
19995
4
      break;
19996
2
    case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
19997
4
    case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
19998
4
      IntNo = Intrinsic::wasm_sub_sat_unsigned;
19999
4
      break;
20000
0
    default:
20001
0
      llvm_unreachable("unexpected builtin ID");
20002
17
    }
20003
17
    Value *LHS = EmitScalarExpr(E->getArg(0));
20004
17
    Value *RHS = EmitScalarExpr(E->getArg(1));
20005
17
    Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
20006
17
    return Builder.CreateCall(Callee, {LHS, RHS});
20007
17
  }
20008
2
  case WebAssembly::BI__builtin_wasm_abs_i8x16:
20009
4
  case WebAssembly::BI__builtin_wasm_abs_i16x8:
20010
6
  case WebAssembly::BI__builtin_wasm_abs_i32x4:
20011
8
  case WebAssembly::BI__builtin_wasm_abs_i64x2: {
20012
8
    Value *Vec = EmitScalarExpr(E->getArg(0));
20013
8
    Value *Neg = Builder.CreateNeg(Vec, "neg");
20014
8
    Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
20015
8
    Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
20016
8
    return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
20017
6
  }
20018
2
  case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20019
4
  case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20020
6
  case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20021
8
  case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20022
10
  case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20023
12
  case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20024
14
  case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20025
16
  case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20026
18
  case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20027
20
  case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20028
22
  case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20029
24
  case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
20030
24
    Value *LHS = EmitScalarExpr(E->getArg(0));
20031
24
    Value *RHS = EmitScalarExpr(E->getArg(1));
20032
24
    Value *ICmp;
20033
24
    switch (BuiltinID) {
20034
2
    case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20035
4
    case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20036
6
    case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20037
6
      ICmp = Builder.CreateICmpSLT(LHS, RHS);
20038
6
      break;
20039
2
    case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20040
4
    case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20041
6
    case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20042
6
      ICmp = Builder.CreateICmpULT(LHS, RHS);
20043
6
      break;
20044
2
    case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20045
4
    case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20046
6
    case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20047
6
      ICmp = Builder.CreateICmpSGT(LHS, RHS);
20048
6
      break;
20049
2
    case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20050
4
    case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20051
6
    case WebAssembly::BI__builtin_wasm_max_u_i32x4:
20052
6
      ICmp = Builder.CreateICmpUGT(LHS, RHS);
20053
6
      break;
20054
0
    default:
20055
0
      llvm_unreachable("unexpected builtin ID");
20056
24
    }
20057
24
    return Builder.CreateSelect(ICmp, LHS, RHS);
20058
24
  }
20059
2
  case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
20060
4
  case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
20061
4
    Value *LHS = EmitScalarExpr(E->getArg(0));
20062
4
    Value *RHS = EmitScalarExpr(E->getArg(1));
20063
4
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
20064
4
                                        ConvertType(E->getType()));
20065
4
    return Builder.CreateCall(Callee, {LHS, RHS});
20066
2
  }
20067
2
  case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
20068
2
    Value *LHS = EmitScalarExpr(E->getArg(0));
20069
2
    Value *RHS = EmitScalarExpr(E->getArg(1));
20070
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
20071
2
    return Builder.CreateCall(Callee, {LHS, RHS});
20072
2
  }
20073
2
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20074
4
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20075
6
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20076
8
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
20077
8
    Value *Vec = EmitScalarExpr(E->getArg(0));
20078
8
    unsigned IntNo;
20079
8
    switch (BuiltinID) {
20080
2
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20081
4
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20082
4
      IntNo = Intrinsic::wasm_extadd_pairwise_signed;
20083
4
      break;
20084
2
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20085
4
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
20086
4
      IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
20087
4
      break;
20088
0
    default:
20089
0
      llvm_unreachable("unexpected builtin ID");
20090
8
    }
20091
20092
8
    Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
20093
8
    return Builder.CreateCall(Callee, Vec);
20094
8
  }
20095
2
  case WebAssembly::BI__builtin_wasm_bitselect: {
20096
2
    Value *V1 = EmitScalarExpr(E->getArg(0));
20097
2
    Value *V2 = EmitScalarExpr(E->getArg(1));
20098
2
    Value *C = EmitScalarExpr(E->getArg(2));
20099
2
    Function *Callee =
20100
2
        CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
20101
2
    return Builder.CreateCall(Callee, {V1, V2, C});
20102
8
  }
20103
2
  case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
20104
2
    Value *LHS = EmitScalarExpr(E->getArg(0));
20105
2
    Value *RHS = EmitScalarExpr(E->getArg(1));
20106
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
20107
2
    return Builder.CreateCall(Callee, {LHS, RHS});
20108
8
  }
20109
2
  case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
20110
2
    Value *Vec = EmitScalarExpr(E->getArg(0));
20111
2
    Function *Callee =
20112
2
        CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
20113
2
    return Builder.CreateCall(Callee, {Vec});
20114
8
  }
20115
2
  case WebAssembly::BI__builtin_wasm_any_true_v128:
20116
4
  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
20117
6
  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
20118
8
  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
20119
10
  case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
20120
10
    unsigned IntNo;
20121
10
    switch (BuiltinID) {
20122
2
    case WebAssembly::BI__builtin_wasm_any_true_v128:
20123
2
      IntNo = Intrinsic::wasm_anytrue;
20124
2
      break;
20125
2
    case WebAssembly::BI__builtin_wasm_all_true_i8x16:
20126
4
    case WebAssembly::BI__builtin_wasm_all_true_i16x8:
20127
6
    case WebAssembly::BI__builtin_wasm_all_true_i32x4:
20128
8
    case WebAssembly::BI__builtin_wasm_all_true_i64x2:
20129
8
      IntNo = Intrinsic::wasm_alltrue;
20130
8
      break;
20131
0
    default:
20132
0
      llvm_unreachable("unexpected builtin ID");
20133
10
    }
20134
10
    Value *Vec = EmitScalarExpr(E->getArg(0));
20135
10
    Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
20136
10
    return Builder.CreateCall(Callee, {Vec});
20137
10
  }
20138
2
  case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
20139
4
  case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
20140
6
  case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
20141
8
  case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
20142
8
    Value *Vec = EmitScalarExpr(E->getArg(0));
20143
8
    Function *Callee =
20144
8
        CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
20145
8
    return Builder.CreateCall(Callee, {Vec});
20146
6
  }
20147
2
  case WebAssembly::BI__builtin_wasm_abs_f32x4:
20148
4
  case WebAssembly::BI__builtin_wasm_abs_f64x2: {
20149
4
    Value *Vec = EmitScalarExpr(E->getArg(0));
20150
4
    Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
20151
4
    return Builder.CreateCall(Callee, {Vec});
20152
2
  }
20153
2
  case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
20154
4
  case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
20155
4
    Value *Vec = EmitScalarExpr(E->getArg(0));
20156
4
    Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
20157
4
    return Builder.CreateCall(Callee, {Vec});
20158
2
  }
20159
2
  case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
20160
4
  case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
20161
6
  case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
20162
8
  case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
20163
8
    Value *Low = EmitScalarExpr(E->getArg(0));
20164
8
    Value *High = EmitScalarExpr(E->getArg(1));
20165
8
    unsigned IntNo;
20166
8
    switch (BuiltinID) {
20167
2
    case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
20168
4
    case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
20169
4
      IntNo = Intrinsic::wasm_narrow_signed;
20170
4
      break;
20171
2
    case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
20172
4
    case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
20173
4
      IntNo = Intrinsic::wasm_narrow_unsigned;
20174
4
      break;
20175
0
    default:
20176
0
      llvm_unreachable("unexpected builtin ID");
20177
8
    }
20178
8
    Function *Callee =
20179
8
        CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
20180
8
    return Builder.CreateCall(Callee, {Low, High});
20181
8
  }
20182
2
  case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
20183
4
  case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
20184
4
    Value *Vec = EmitScalarExpr(E->getArg(0));
20185
4
    unsigned IntNo;
20186
4
    switch (BuiltinID) {
20187
2
    case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
20188
2
      IntNo = Intrinsic::fptosi_sat;
20189
2
      break;
20190
2
    case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
20191
2
      IntNo = Intrinsic::fptoui_sat;
20192
2
      break;
20193
0
    default:
20194
0
      llvm_unreachable("unexpected builtin ID");
20195
4
    }
20196
4
    llvm::Type *SrcT = Vec->getType();
20197
4
    llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
20198
4
    Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
20199
4
    Value *Trunc = Builder.CreateCall(Callee, Vec);
20200
4
    Value *Splat = Constant::getNullValue(TruncT);
20201
4
    return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
20202
4
  }
20203
2
  case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
20204
2
    Value *Ops[18];
20205
2
    size_t OpIdx = 0;
20206
2
    Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
20207
2
    Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
20208
34
    while (OpIdx < 18) {
20209
32
      std::optional<llvm::APSInt> LaneConst =
20210
32
          E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
20211
32
      assert(LaneConst && "Constant arg isn't actually constant?");
20212
32
      Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
20213
32
    }
20214
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
20215
2
    return Builder.CreateCall(Callee, Ops);
20216
2
  }
20217
2
  case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
20218
4
  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
20219
6
  case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
20220
8
  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
20221
8
    Value *A = EmitScalarExpr(E->getArg(0));
20222
8
    Value *B = EmitScalarExpr(E->getArg(1));
20223
8
    Value *C = EmitScalarExpr(E->getArg(2));
20224
8
    unsigned IntNo;
20225
8
    switch (BuiltinID) {
20226
2
    case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
20227
4
    case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
20228
4
      IntNo = Intrinsic::wasm_relaxed_madd;
20229
4
      break;
20230
2
    case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
20231
4
    case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
20232
4
      IntNo = Intrinsic::wasm_relaxed_nmadd;
20233
4
      break;
20234
0
    default:
20235
0
      llvm_unreachable("unexpected builtin ID");
20236
8
    }
20237
8
    Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
20238
8
    return Builder.CreateCall(Callee, {A, B, C});
20239
8
  }
20240
2
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
20241
4
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
20242
6
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
20243
8
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
20244
8
    Value *A = EmitScalarExpr(E->getArg(0));
20245
8
    Value *B = EmitScalarExpr(E->getArg(1));
20246
8
    Value *C = EmitScalarExpr(E->getArg(2));
20247
8
    Function *Callee =
20248
8
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
20249
8
    return Builder.CreateCall(Callee, {A, B, C});
20250
6
  }
20251
2
  case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
20252
2
    Value *Src = EmitScalarExpr(E->getArg(0));
20253
2
    Value *Indices = EmitScalarExpr(E->getArg(1));
20254
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
20255
2
    return Builder.CreateCall(Callee, {Src, Indices});
20256
6
  }
20257
2
  case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
20258
4
  case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
20259
6
  case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
20260
8
  case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
20261
8
    Value *LHS = EmitScalarExpr(E->getArg(0));
20262
8
    Value *RHS = EmitScalarExpr(E->getArg(1));
20263
8
    unsigned IntNo;
20264
8
    switch (BuiltinID) {
20265
2
    case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
20266
4
    case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
20267
4
      IntNo = Intrinsic::wasm_relaxed_min;
20268
4
      break;
20269
2
    case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
20270
4
    case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
20271
4
      IntNo = Intrinsic::wasm_relaxed_max;
20272
4
      break;
20273
0
    default:
20274
0
      llvm_unreachable("unexpected builtin ID");
20275
8
    }
20276
8
    Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
20277
8
    return Builder.CreateCall(Callee, {LHS, RHS});
20278
8
  }
20279
2
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
20280
4
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
20281
6
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
20282
8
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
20283
8
    Value *Vec = EmitScalarExpr(E->getArg(0));
20284
8
    unsigned IntNo;
20285
8
    switch (BuiltinID) {
20286
2
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
20287
2
      IntNo = Intrinsic::wasm_relaxed_trunc_signed;
20288
2
      break;
20289
2
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
20290
2
      IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
20291
2
      break;
20292
2
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
20293
2
      IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
20294
2
      break;
20295
2
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
20296
2
      IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
20297
2
      break;
20298
0
    default:
20299
0
      llvm_unreachable("unexpected builtin ID");
20300
8
    }
20301
8
    Function *Callee = CGM.getIntrinsic(IntNo);
20302
8
    return Builder.CreateCall(Callee, {Vec});
20303
8
  }
20304
2
  case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
20305
2
    Value *LHS = EmitScalarExpr(E->getArg(0));
20306
2
    Value *RHS = EmitScalarExpr(E->getArg(1));
20307
2
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
20308
2
    return Builder.CreateCall(Callee, {LHS, RHS});
20309
8
  }
20310
2
  case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
20311
2
    Value *LHS = EmitScalarExpr(E->getArg(0));
20312
2
    Value *RHS = EmitScalarExpr(E->getArg(1));
20313
2
    Function *Callee =
20314
2
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
20315
2
    return Builder.CreateCall(Callee, {LHS, RHS});
20316
8
  }
20317
2
  case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
20318
2
    Value *LHS = EmitScalarExpr(E->getArg(0));
20319
2
    Value *RHS = EmitScalarExpr(E->getArg(1));
20320
2
    Value *Acc = EmitScalarExpr(E->getArg(2));
20321
2
    Function *Callee =
20322
2
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
20323
2
    return Builder.CreateCall(Callee, {LHS, RHS, Acc});
20324
8
  }
20325
2
  case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
20326
2
    Value *LHS = EmitScalarExpr(E->getArg(0));
20327
2
    Value *RHS = EmitScalarExpr(E->getArg(1));
20328
2
    Value *Acc = EmitScalarExpr(E->getArg(2));
20329
2
    Function *Callee =
20330
2
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
20331
2
    return Builder.CreateCall(Callee, {LHS, RHS, Acc});
20332
8
  }
20333
0
  case WebAssembly::BI__builtin_wasm_table_get: {
20334
0
    assert(E->getArg(0)->getType()->isArrayType());
20335
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20336
0
    Value *Index = EmitScalarExpr(E->getArg(1));
20337
0
    Function *Callee;
20338
0
    if (E->getType().isWebAssemblyExternrefType())
20339
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
20340
0
    else if (E->getType().isWebAssemblyFuncrefType())
20341
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
20342
0
    else
20343
0
      llvm_unreachable(
20344
0
          "Unexpected reference type for __builtin_wasm_table_get");
20345
0
    return Builder.CreateCall(Callee, {Table, Index});
20346
0
  }
20347
0
  case WebAssembly::BI__builtin_wasm_table_set: {
20348
0
    assert(E->getArg(0)->getType()->isArrayType());
20349
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20350
0
    Value *Index = EmitScalarExpr(E->getArg(1));
20351
0
    Value *Val = EmitScalarExpr(E->getArg(2));
20352
0
    Function *Callee;
20353
0
    if (E->getArg(2)->getType().isWebAssemblyExternrefType())
20354
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
20355
0
    else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
20356
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
20357
0
    else
20358
0
      llvm_unreachable(
20359
0
          "Unexpected reference type for __builtin_wasm_table_set");
20360
0
    return Builder.CreateCall(Callee, {Table, Index, Val});
20361
0
  }
20362
0
  case WebAssembly::BI__builtin_wasm_table_size: {
20363
0
    assert(E->getArg(0)->getType()->isArrayType());
20364
0
    Value *Value = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20365
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
20366
0
    return Builder.CreateCall(Callee, Value);
20367
0
  }
20368
0
  case WebAssembly::BI__builtin_wasm_table_grow: {
20369
0
    assert(E->getArg(0)->getType()->isArrayType());
20370
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20371
0
    Value *Val = EmitScalarExpr(E->getArg(1));
20372
0
    Value *NElems = EmitScalarExpr(E->getArg(2));
20373
20374
0
    Function *Callee;
20375
0
    if (E->getArg(1)->getType().isWebAssemblyExternrefType())
20376
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
20377
0
    else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
20378
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
20379
0
    else
20380
0
      llvm_unreachable(
20381
0
          "Unexpected reference type for __builtin_wasm_table_grow");
20382
20383
0
    return Builder.CreateCall(Callee, {Table, Val, NElems});
20384
0
  }
20385
0
  case WebAssembly::BI__builtin_wasm_table_fill: {
20386
0
    assert(E->getArg(0)->getType()->isArrayType());
20387
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20388
0
    Value *Index = EmitScalarExpr(E->getArg(1));
20389
0
    Value *Val = EmitScalarExpr(E->getArg(2));
20390
0
    Value *NElems = EmitScalarExpr(E->getArg(3));
20391
20392
0
    Function *Callee;
20393
0
    if (E->getArg(2)->getType().isWebAssemblyExternrefType())
20394
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
20395
0
    else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
20396
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
20397
0
    else
20398
0
      llvm_unreachable(
20399
0
          "Unexpected reference type for __builtin_wasm_table_fill");
20400
20401
0
    return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
20402
0
  }
20403
0
  case WebAssembly::BI__builtin_wasm_table_copy: {
20404
0
    assert(E->getArg(0)->getType()->isArrayType());
20405
0
    Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20406
0
    Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).getPointer();
20407
0
    Value *DstIdx = EmitScalarExpr(E->getArg(2));
20408
0
    Value *SrcIdx = EmitScalarExpr(E->getArg(3));
20409
0
    Value *NElems = EmitScalarExpr(E->getArg(4));
20410
20411
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
20412
20413
0
    return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
20414
0
  }
20415
0
  default:
20416
0
    return nullptr;
20417
283
  }
20418
283
}
20419
20420
static std::pair<Intrinsic::ID, unsigned>
20421
0
getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
20422
0
  struct Info {
20423
0
    unsigned BuiltinID;
20424
0
    Intrinsic::ID IntrinsicID;
20425
0
    unsigned VecLen;
20426
0
  };
20427
0
  static Info Infos[] = {
20428
0
#define CUSTOM_BUILTIN_MAPPING(x,s) \
20429
0
  { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
20430
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
20431
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
20432
0
    CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
20433
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
20434
0
    CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
20435
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
20436
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
20437
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
20438
0
    CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
20439
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
20440
0
    CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
20441
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
20442
0
    CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
20443
0
    CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
20444
0
    CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
20445
0
    CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
20446
0
    CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
20447
0
    CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
20448
0
    CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
20449
0
    CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
20450
0
    CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
20451
0
    CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
20452
    // Legacy builtins that take a vector in place of a vector predicate.
20453
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
20454
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
20455
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
20456
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
20457
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
20458
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
20459
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
20460
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
20461
0
#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
20462
0
#undef CUSTOM_BUILTIN_MAPPING
20463
0
  };
20464
20465
0
  auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
20466
0
  static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
20467
0
  (void)SortOnce;
20468
20469
0
  const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
20470
0
  if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
20471
0
    return {Intrinsic::not_intrinsic, 0};
20472
20473
0
  return {F->IntrinsicID, F->VecLen};
20474
0
}
20475
20476
Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
20477
0
                                               const CallExpr *E) {
20478
0
  Intrinsic::ID ID;
20479
0
  unsigned VecLen;
20480
0
  std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
20481
20482
0
  auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
20483
    // The base pointer is passed by address, so it needs to be loaded.
20484
0
    Address A = EmitPointerWithAlignment(E->getArg(0));
20485
0
    Address BP = Address(A.getPointer(), Int8PtrTy, A.getAlignment());
20486
0
    llvm::Value *Base = Builder.CreateLoad(BP);
20487
    // The treatment of both loads and stores is the same: the arguments for
20488
    // the builtin are the same as the arguments for the intrinsic.
20489
    // Load:
20490
    //   builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
20491
    //   builtin(Base, Mod, Start)      -> intr(Base, Mod, Start)
20492
    // Store:
20493
    //   builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
20494
    //   builtin(Base, Mod, Val, Start)      -> intr(Base, Mod, Val, Start)
20495
0
    SmallVector<llvm::Value*,5> Ops = { Base };
20496
0
    for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
20497
0
      Ops.push_back(EmitScalarExpr(E->getArg(i)));
20498
20499
0
    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
20500
    // The load intrinsics generate two results (Value, NewBase), stores
20501
    // generate one (NewBase). The new base address needs to be stored.
20502
0
    llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
20503
0
                                  : Result;
20504
0
    llvm::Value *LV = EmitScalarExpr(E->getArg(0));
20505
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
20506
0
    llvm::Value *RetVal =
20507
0
        Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
20508
0
    if (IsLoad)
20509
0
      RetVal = Builder.CreateExtractValue(Result, 0);
20510
0
    return RetVal;
20511
0
  };
20512
20513
  // Handle the conversion of bit-reverse load intrinsics to bit code.
20514
  // The intrinsic call after this function only reads from memory and the
20515
  // write to memory is dealt by the store instruction.
20516
0
  auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
20517
    // The intrinsic generates one result, which is the new value for the base
20518
    // pointer. It needs to be returned. The result of the load instruction is
20519
    // passed to intrinsic by address, so the value needs to be stored.
20520
0
    llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
20521
20522
    // Expressions like &(*pt++) will be incremented per evaluation.
20523
    // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
20524
    // per call.
20525
0
    Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
20526
0
    DestAddr = Address(DestAddr.getPointer(), Int8Ty, DestAddr.getAlignment());
20527
0
    llvm::Value *DestAddress = DestAddr.getPointer();
20528
20529
    // Operands are Base, Dest, Modifier.
20530
    // The intrinsic format in LLVM IR is defined as
20531
    // { ValueType, i8* } (i8*, i32).
20532
0
    llvm::Value *Result = Builder.CreateCall(
20533
0
        CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
20534
20535
    // The value needs to be stored as the variable is passed by reference.
20536
0
    llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
20537
20538
    // The store needs to be truncated to fit the destination type.
20539
    // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
20540
    // to be handled with stores of respective destination type.
20541
0
    DestVal = Builder.CreateTrunc(DestVal, DestTy);
20542
20543
0
    Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
20544
    // The updated value of the base pointer is returned.
20545
0
    return Builder.CreateExtractValue(Result, 1);
20546
0
  };
20547
20548
0
  auto V2Q = [this, VecLen] (llvm::Value *Vec) {
20549
0
    Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
20550
0
                                     : Intrinsic::hexagon_V6_vandvrt;
20551
0
    return Builder.CreateCall(CGM.getIntrinsic(ID),
20552
0
                              {Vec, Builder.getInt32(-1)});
20553
0
  };
20554
0
  auto Q2V = [this, VecLen] (llvm::Value *Pred) {
20555
0
    Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
20556
0
                                     : Intrinsic::hexagon_V6_vandqrt;
20557
0
    return Builder.CreateCall(CGM.getIntrinsic(ID),
20558
0
                              {Pred, Builder.getInt32(-1)});
20559
0
  };
20560
20561
0
  switch (BuiltinID) {
20562
  // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
20563
  // and the corresponding C/C++ builtins use loads/stores to update
20564
  // the predicate.
20565
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
20566
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
20567
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
20568
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
20569
    // Get the type from the 0-th argument.
20570
0
    llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
20571
0
    Address PredAddr =
20572
0
        EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
20573
0
    llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
20574
0
    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
20575
0
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
20576
20577
0
    llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
20578
0
    Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
20579
0
        PredAddr.getAlignment());
20580
0
    return Builder.CreateExtractValue(Result, 0);
20581
0
  }
20582
  // These are identical to the builtins above, except they don't consume
20583
  // input carry, only generate carry-out. Since they still produce two
20584
  // outputs, generate the store of the predicate, but no load.
20585
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
20586
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
20587
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
20588
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
20589
    // Get the type from the 0-th argument.
20590
0
    llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
20591
0
    Address PredAddr =
20592
0
        EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
20593
0
    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
20594
0
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20595
20596
0
    llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
20597
0
    Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
20598
0
        PredAddr.getAlignment());
20599
0
    return Builder.CreateExtractValue(Result, 0);
20600
0
  }
20601
20602
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
20603
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
20604
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
20605
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
20606
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
20607
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
20608
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
20609
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
20610
0
    SmallVector<llvm::Value*,4> Ops;
20611
0
    const Expr *PredOp = E->getArg(0);
20612
    // There will be an implicit cast to a boolean vector. Strip it.
20613
0
    if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
20614
0
      if (Cast->getCastKind() == CK_BitCast)
20615
0
        PredOp = Cast->getSubExpr();
20616
0
      Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
20617
0
    }
20618
0
    for (int i = 1, e = E->getNumArgs(); i != e; ++i)
20619
0
      Ops.push_back(EmitScalarExpr(E->getArg(i)));
20620
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
20621
0
  }
20622
20623
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
20624
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
20625
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
20626
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
20627
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
20628
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
20629
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
20630
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
20631
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
20632
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
20633
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
20634
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
20635
0
    return MakeCircOp(ID, /*IsLoad=*/true);
20636
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
20637
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
20638
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
20639
0
  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
20640
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
20641
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
20642
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
20643
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
20644
0
  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
20645
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
20646
0
    return MakeCircOp(ID, /*IsLoad=*/false);
20647
0
  case Hexagon::BI__builtin_brev_ldub:
20648
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
20649
0
  case Hexagon::BI__builtin_brev_ldb:
20650
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
20651
0
  case Hexagon::BI__builtin_brev_lduh:
20652
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
20653
0
  case Hexagon::BI__builtin_brev_ldh:
20654
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
20655
0
  case Hexagon::BI__builtin_brev_ldw:
20656
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
20657
0
  case Hexagon::BI__builtin_brev_ldd:
20658
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
20659
0
  } // switch
20660
20661
0
  return nullptr;
20662
0
}
20663
20664
Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
20665
                                             const CallExpr *E,
20666
52
                                             ReturnValueSlot ReturnValue) {
20667
52
  SmallVector<Value *, 4> Ops;
20668
52
  llvm::Type *ResultType = ConvertType(E->getType());
20669
20670
  // Find out if any arguments are required to be integer constant expressions.
20671
52
  unsigned ICEArguments = 0;
20672
52
  ASTContext::GetBuiltinTypeError Error;
20673
52
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
20674
52
  if (Error == ASTContext::GE_Missing_type) {
20675
    // Vector intrinsics don't have a type string.
20676
0
    assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
20677
0
           BuiltinID <= clang::RISCV::LastRVVBuiltin);
20678
0
    ICEArguments = 0;
20679
0
    if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
20680
0
        BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
20681
0
      ICEArguments = 1 << 1;
20682
52
  } else {
20683
52
    assert(Error == ASTContext::GE_None && "Unexpected error");
20684
52
  }
20685
20686
52
  if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
20687
0
    ICEArguments |= (1 << 1);
20688
52
  if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
20689
0
    ICEArguments |= (1 << 2);
20690
20691
128
  for (unsigned i = 0, e = E->getNumArgs(); i != e; 
i++76
) {
20692
    // Handle aggregate argument, namely RVV tuple types in segment load/store
20693
76
    if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
20694
0
      LValue L = EmitAggExprToLValue(E->getArg(i));
20695
0
      llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this));
20696
0
      Ops.push_back(AggValue);
20697
0
      continue;
20698
0
    }
20699
76
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
20700
76
  }
20701
20702
52
  Intrinsic::ID ID = Intrinsic::not_intrinsic;
20703
52
  unsigned NF = 1;
20704
  // The 0th bit simulates the `vta` of RVV
20705
  // The 1st bit simulates the `vma` of RVV
20706
52
  constexpr unsigned RVV_VTA = 0x1;
20707
52
  constexpr unsigned RVV_VMA = 0x2;
20708
52
  int PolicyAttrs = 0;
20709
52
  bool IsMasked = false;
20710
20711
  // Required for overloaded intrinsics.
20712
52
  llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
20713
52
  switch (BuiltinID) {
20714
0
  default: llvm_unreachable("unexpected builtin ID");
20715
2
  case RISCV::BI__builtin_riscv_orc_b_32:
20716
3
  case RISCV::BI__builtin_riscv_orc_b_64:
20717
9
  case RISCV::BI__builtin_riscv_clz_32:
20718
12
  case RISCV::BI__builtin_riscv_clz_64:
20719
14
  case RISCV::BI__builtin_riscv_ctz_32:
20720
15
  case RISCV::BI__builtin_riscv_ctz_64:
20721
19
  case RISCV::BI__builtin_riscv_clmul_32:
20722
21
  case RISCV::BI__builtin_riscv_clmul_64:
20723
23
  case RISCV::BI__builtin_riscv_clmulh_32:
20724
25
  case RISCV::BI__builtin_riscv_clmulh_64:
20725
26
  case RISCV::BI__builtin_riscv_clmulr_32:
20726
27
  case RISCV::BI__builtin_riscv_clmulr_64:
20727
28
  case RISCV::BI__builtin_riscv_xperm4_32:
20728
29
  case RISCV::BI__builtin_riscv_xperm4_64:
20729
30
  case RISCV::BI__builtin_riscv_xperm8_32:
20730
31
  case RISCV::BI__builtin_riscv_xperm8_64:
20731
33
  case RISCV::BI__builtin_riscv_brev8_32:
20732
34
  case RISCV::BI__builtin_riscv_brev8_64:
20733
35
  case RISCV::BI__builtin_riscv_zip_32:
20734
36
  case RISCV::BI__builtin_riscv_unzip_32: {
20735
36
    switch (BuiltinID) {
20736
0
    default: llvm_unreachable("unexpected builtin ID");
20737
    // Zbb
20738
2
    case RISCV::BI__builtin_riscv_orc_b_32:
20739
3
    case RISCV::BI__builtin_riscv_orc_b_64:
20740
3
      ID = Intrinsic::riscv_orc_b;
20741
3
      break;
20742
6
    case RISCV::BI__builtin_riscv_clz_32:
20743
9
    case RISCV::BI__builtin_riscv_clz_64: {
20744
9
      Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
20745
9
      Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
20746
9
      if (Result->getType() != ResultType)
20747
3
        Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
20748
3
                                       "cast");
20749
9
      return Result;
20750
6
    }
20751
2
    case RISCV::BI__builtin_riscv_ctz_32:
20752
3
    case RISCV::BI__builtin_riscv_ctz_64: {
20753
3
      Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
20754
3
      Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
20755
3
      if (Result->getType() != ResultType)
20756
1
        Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
20757
1
                                       "cast");
20758
3
      return Result;
20759
2
    }
20760
20761
    // Zbc
20762
4
    case RISCV::BI__builtin_riscv_clmul_32:
20763
6
    case RISCV::BI__builtin_riscv_clmul_64:
20764
6
      ID = Intrinsic::riscv_clmul;
20765
6
      break;
20766
2
    case RISCV::BI__builtin_riscv_clmulh_32:
20767
4
    case RISCV::BI__builtin_riscv_clmulh_64:
20768
4
      ID = Intrinsic::riscv_clmulh;
20769
4
      break;
20770
1
    case RISCV::BI__builtin_riscv_clmulr_32:
20771
2
    case RISCV::BI__builtin_riscv_clmulr_64:
20772
2
      ID = Intrinsic::riscv_clmulr;
20773
2
      break;
20774
20775
    // Zbkx
20776
1
    case RISCV::BI__builtin_riscv_xperm8_32:
20777
2
    case RISCV::BI__builtin_riscv_xperm8_64:
20778
2
      ID = Intrinsic::riscv_xperm8;
20779
2
      break;
20780
1
    case RISCV::BI__builtin_riscv_xperm4_32:
20781
2
    case RISCV::BI__builtin_riscv_xperm4_64:
20782
2
      ID = Intrinsic::riscv_xperm4;
20783
2
      break;
20784
20785
    // Zbkb
20786
2
    case RISCV::BI__builtin_riscv_brev8_32:
20787
3
    case RISCV::BI__builtin_riscv_brev8_64:
20788
3
      ID = Intrinsic::riscv_brev8;
20789
3
      break;
20790
1
    case RISCV::BI__builtin_riscv_zip_32:
20791
1
      ID = Intrinsic::riscv_zip;
20792
1
      break;
20793
1
    case RISCV::BI__builtin_riscv_unzip_32:
20794
1
      ID = Intrinsic::riscv_unzip;
20795
1
      break;
20796
36
    }
20797
20798
24
    IntrinsicTypes = {ResultType};
20799
24
    break;
20800
36
  }
20801
20802
  // Zk builtins
20803
20804
  // Zknh
20805
2
  case RISCV::BI__builtin_riscv_sha256sig0:
20806
2
    ID = Intrinsic::riscv_sha256sig0;
20807
2
    break;
20808
2
  case RISCV::BI__builtin_riscv_sha256sig1:
20809
2
    ID = Intrinsic::riscv_sha256sig1;
20810
2
    break;
20811
2
  case RISCV::BI__builtin_riscv_sha256sum0:
20812
2
    ID = Intrinsic::riscv_sha256sum0;
20813
2
    break;
20814
2
  case RISCV::BI__builtin_riscv_sha256sum1:
20815
2
    ID = Intrinsic::riscv_sha256sum1;
20816
2
    break;
20817
20818
  // Zksed
20819
2
  case RISCV::BI__builtin_riscv_sm4ks:
20820
2
    ID = Intrinsic::riscv_sm4ks;
20821
2
    break;
20822
2
  case RISCV::BI__builtin_riscv_sm4ed:
20823
2
    ID = Intrinsic::riscv_sm4ed;
20824
2
    break;
20825
20826
  // Zksh
20827
2
  case RISCV::BI__builtin_riscv_sm3p0:
20828
2
    ID = Intrinsic::riscv_sm3p0;
20829
2
    break;
20830
2
  case RISCV::BI__builtin_riscv_sm3p1:
20831
2
    ID = Intrinsic::riscv_sm3p1;
20832
2
    break;
20833
20834
  // Zihintntl
20835
0
  case RISCV::BI__builtin_riscv_ntl_load: {
20836
0
    llvm::Type *ResTy = ConvertType(E->getType());
20837
0
    unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
20838
0
    if (Ops.size() == 2)
20839
0
      DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
20840
20841
0
    llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
20842
0
        getLLVMContext(),
20843
0
        llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
20844
0
    llvm::MDNode *NontemporalNode = llvm::MDNode::get(
20845
0
        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
20846
20847
0
    int Width;
20848
0
    if(ResTy->isScalableTy()) {
20849
0
      const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
20850
0
      llvm::Type *ScalarTy = ResTy->getScalarType();
20851
0
      Width = ScalarTy->getPrimitiveSizeInBits() *
20852
0
              SVTy->getElementCount().getKnownMinValue();
20853
0
    } else
20854
0
      Width = ResTy->getPrimitiveSizeInBits();
20855
0
    LoadInst *Load = Builder.CreateLoad(
20856
0
        Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
20857
20858
0
    Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
20859
0
    Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
20860
0
                      RISCVDomainNode);
20861
20862
0
    return Load;
20863
36
  }
20864
0
  case RISCV::BI__builtin_riscv_ntl_store: {
20865
0
    unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
20866
0
    if (Ops.size() == 3)
20867
0
      DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
20868
20869
0
    llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
20870
0
        getLLVMContext(),
20871
0
        llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
20872
0
    llvm::MDNode *NontemporalNode = llvm::MDNode::get(
20873
0
        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
20874
20875
0
    StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
20876
0
    Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
20877
0
    Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
20878
0
                       RISCVDomainNode);
20879
20880
0
    return Store;
20881
36
  }
20882
20883
  // Vector builtins are handled from here.
20884
0
#include "clang/Basic/riscv_vector_builtin_cg.inc"
20885
  // SiFive Vector builtins are handled from here.
20886
52
#include 
"clang/Basic/riscv_sifive_vector_builtin_cg.inc"0
20887
52
  }
20888
20889
40
  assert(ID != Intrinsic::not_intrinsic);
20890
20891
40
  llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
20892
40
  return Builder.CreateCall(F, Ops, "");
20893
40
}