Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Line
Count
Source (jump to first uncovered line)
1
//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This contains code to emit Builtin calls as LLVM code.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "CGCXXABI.h"
15
#include "CGObjCRuntime.h"
16
#include "CGOpenCLRuntime.h"
17
#include "CodeGenFunction.h"
18
#include "CodeGenModule.h"
19
#include "ConstantEmitter.h"
20
#include "TargetInfo.h"
21
#include "clang/AST/ASTContext.h"
22
#include "clang/AST/Decl.h"
23
#include "clang/Analysis/Analyses/OSLog.h"
24
#include "clang/Basic/TargetBuiltins.h"
25
#include "clang/Basic/TargetInfo.h"
26
#include "clang/CodeGen/CGFunctionInfo.h"
27
#include "llvm/ADT/StringExtras.h"
28
#include "llvm/IR/CallSite.h"
29
#include "llvm/IR/DataLayout.h"
30
#include "llvm/IR/InlineAsm.h"
31
#include "llvm/IR/Intrinsics.h"
32
#include "llvm/IR/MDBuilder.h"
33
#include "llvm/Support/ConvertUTF.h"
34
#include <sstream>
35
36
using namespace clang;
37
using namespace CodeGen;
38
using namespace llvm;
39
40
static
41
8
int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
42
8
  return std::min(High, std::max(Low, Value));
43
8
}
44
45
/// getBuiltinLibFunction - Given a builtin id for a function like
46
/// "__builtin_fabsf", return a Function* for "fabsf".
47
llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
48
5.97k
                                                     unsigned BuiltinID) {
49
5.97k
  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
50
5.97k
51
5.97k
  // Get the name, skip over the __builtin_ prefix (if necessary).
52
5.97k
  StringRef Name;
53
5.97k
  GlobalDecl D(FD);
54
5.97k
55
5.97k
  // If the builtin has been declared explicitly with an assembler label,
56
5.97k
  // use the mangled name. This differs from the plain label on platforms
57
5.97k
  // that prefix labels.
58
5.97k
  if (FD->hasAttr<AsmLabelAttr>())
59
0
    Name = getMangledName(D);
60
5.97k
  else
61
5.97k
    Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
62
5.97k
63
5.97k
  llvm::FunctionType *Ty =
64
5.97k
    cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
65
5.97k
66
5.97k
  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
67
5.97k
}
68
69
/// Emit the conversions required to turn the given value into an
70
/// integer of the given size.
71
static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
72
723
                        QualType T, llvm::IntegerType *IntType) {
73
723
  V = CGF.EmitToMemory(V, T);
74
723
75
723
  if (V->getType()->isPointerTy())
76
69
    return CGF.Builder.CreatePtrToInt(V, IntType);
77
654
78
723
  assert(V->getType() == IntType);
79
654
  return V;
80
654
}
81
82
static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
83
593
                          QualType T, llvm::Type *ResultType) {
84
593
  V = CGF.EmitFromMemory(V, T);
85
593
86
593
  if (ResultType->isPointerTy())
87
9
    return CGF.Builder.CreateIntToPtr(V, ResultType);
88
584
89
593
  assert(V->getType() == ResultType);
90
584
  return V;
91
584
}
92
93
/// Utility to insert an atomic instruction based on Instrinsic::ID
94
/// and the expression node.
95
static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
96
                                    llvm::AtomicRMWInst::BinOp Kind,
97
482
                                    const CallExpr *E) {
98
482
  QualType T = E->getType();
99
482
  assert(E->getArg(0)->getType()->isPointerType());
100
482
  assert(CGF.getContext().hasSameUnqualifiedType(T,
101
482
                                  E->getArg(0)->getType()->getPointeeType()));
102
482
  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
103
482
104
482
  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
105
482
  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
106
482
107
482
  llvm::IntegerType *IntType =
108
482
    llvm::IntegerType::get(CGF.getLLVMContext(),
109
482
                           CGF.getContext().getTypeSize(T));
110
482
  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
111
482
112
482
  llvm::Value *Args[2];
113
482
  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
114
482
  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
115
482
  llvm::Type *ValueType = Args[1]->getType();
116
482
  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
117
482
118
482
  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
119
482
      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
120
482
  return EmitFromInt(CGF, Result, T, ValueType);
121
482
}
122
123
64
static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
124
64
  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
125
64
  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
126
64
127
64
  // Convert the type of the pointer to a pointer to the stored type.
128
64
  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
129
64
  Value *BC = CGF.Builder.CreateBitCast(
130
64
      Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
131
64
  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
132
64
  LV.setNontemporal(true);
133
64
  CGF.EmitStoreOfScalar(Val, LV, false);
134
64
  return nullptr;
135
64
}
136
137
30
static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
138
30
  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
139
30
140
30
  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
141
30
  LV.setNontemporal(true);
142
30
  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
143
30
}
144
145
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
146
                               llvm::AtomicRMWInst::BinOp Kind,
147
311
                               const CallExpr *E) {
148
311
  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
149
311
}
150
151
/// Utility to insert an atomic instruction based Instrinsic::ID and
152
/// the expression node, where the return value is the result of the
153
/// operation.
154
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
155
                                   llvm::AtomicRMWInst::BinOp Kind,
156
                                   const CallExpr *E,
157
                                   Instruction::BinaryOps Op,
158
59
                                   bool Invert = false) {
159
59
  QualType T = E->getType();
160
59
  assert(E->getArg(0)->getType()->isPointerType());
161
59
  assert(CGF.getContext().hasSameUnqualifiedType(T,
162
59
                                  E->getArg(0)->getType()->getPointeeType()));
163
59
  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
164
59
165
59
  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
166
59
  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
167
59
168
59
  llvm::IntegerType *IntType =
169
59
    llvm::IntegerType::get(CGF.getLLVMContext(),
170
59
                           CGF.getContext().getTypeSize(T));
171
59
  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
172
59
173
59
  llvm::Value *Args[2];
174
59
  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
175
59
  llvm::Type *ValueType = Args[1]->getType();
176
59
  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
177
59
  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
178
59
179
59
  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
180
59
      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
181
59
  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
182
59
  if (Invert)
183
9
    Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
184
9
                                     llvm::ConstantInt::get(IntType, -1));
185
59
  Result = EmitFromInt(CGF, Result, T, ValueType);
186
59
  return RValue::get(Result);
187
59
}
188
189
/// @brief Utility to insert an atomic cmpxchg instruction.
190
///
191
/// @param CGF The current codegen function.
192
/// @param E   Builtin call expression to convert to cmpxchg.
193
///            arg0 - address to operate on
194
///            arg1 - value to compare with
195
///            arg2 - new value
196
/// @param ReturnBool Specifies whether to return success flag of
197
///                   cmpxchg result or the old value.
198
///
199
/// @returns result of cmpxchg, according to ReturnBool
200
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
201
91
                                     bool ReturnBool) {
202
91
  QualType T = ReturnBool ? 
E->getArg(1)->getType()39
:
E->getType()52
;
203
91
  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
204
91
  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
205
91
206
91
  llvm::IntegerType *IntType = llvm::IntegerType::get(
207
91
      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
208
91
  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
209
91
210
91
  Value *Args[3];
211
91
  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
212
91
  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
213
91
  llvm::Type *ValueType = Args[1]->getType();
214
91
  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
215
91
  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
216
91
217
91
  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
218
91
      Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
219
91
      llvm::AtomicOrdering::SequentiallyConsistent);
220
91
  if (ReturnBool)
221
91
    // Extract boolean success flag and zext it to int.
222
39
    return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
223
39
                                  CGF.ConvertType(E->getType()));
224
91
  else
225
91
    // Extract old value and emit it using the same type as compare value.
226
52
    return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
227
52
                       ValueType);
228
0
}
229
230
// Emit a simple mangled intrinsic that has 1 argument and a return type
231
// matching the argument type.
232
static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
233
                               const CallExpr *E,
234
326
                               unsigned IntrinsicID) {
235
326
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
236
326
237
326
  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
238
326
  return CGF.Builder.CreateCall(F, Src0);
239
326
}
240
241
// Emit an intrinsic that has 2 operands of the same type as its result.
242
static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
243
                                const CallExpr *E,
244
1.61k
                                unsigned IntrinsicID) {
245
1.61k
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
246
1.61k
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
247
1.61k
248
1.61k
  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
249
1.61k
  return CGF.Builder.CreateCall(F, { Src0, Src1 });
250
1.61k
}
251
252
// Emit an intrinsic that has 3 operands of the same type as its result.
253
static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
254
                                 const CallExpr *E,
255
23
                                 unsigned IntrinsicID) {
256
23
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
257
23
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
258
23
  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
259
23
260
23
  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
261
23
  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
262
23
}
263
264
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
265
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
266
                               const CallExpr *E,
267
16
                               unsigned IntrinsicID) {
268
16
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
269
16
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
270
16
271
16
  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
272
16
  return CGF.Builder.CreateCall(F, {Src0, Src1});
273
16
}
274
275
/// EmitFAbs - Emit a call to @llvm.fabs().
276
2.08k
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
277
2.08k
  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
278
2.08k
  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
279
2.08k
  Call->setDoesNotAccessMemory();
280
2.08k
  return Call;
281
2.08k
}
282
283
/// Emit the computation of the sign bit for a floating point value. Returns
284
/// the i1 sign bit value.
285
28
static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
286
28
  LLVMContext &C = CGF.CGM.getLLVMContext();
287
28
288
28
  llvm::Type *Ty = V->getType();
289
28
  int Width = Ty->getPrimitiveSizeInBits();
290
28
  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
291
28
  V = CGF.Builder.CreateBitCast(V, IntTy);
292
28
  if (
Ty->isPPC_FP128Ty()28
) {
293
14
    // We want the sign bit of the higher-order double. The bitcast we just
294
14
    // did works as if the double-double was stored to memory and then
295
14
    // read as an i128. The "store" will put the higher-order double in the
296
14
    // lower address in both little- and big-Endian modes, but the "load"
297
14
    // will treat those bits as a different part of the i128: the low bits in
298
14
    // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
299
14
    // we need to shift the high bits down to the low before truncating.
300
14
    Width >>= 1;
301
14
    if (
CGF.getTarget().isBigEndian()14
) {
302
9
      Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
303
9
      V = CGF.Builder.CreateLShr(V, ShiftCst);
304
9
    }
305
14
    // We are truncating value in order to extract the higher-order
306
14
    // double, which we will be using to extract the sign from.
307
14
    IntTy = llvm::IntegerType::get(C, Width);
308
14
    V = CGF.Builder.CreateTrunc(V, IntTy);
309
14
  }
310
28
  Value *Zero = llvm::Constant::getNullValue(IntTy);
311
28
  return CGF.Builder.CreateICmpSLT(V, Zero);
312
28
}
313
314
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
315
98.5k
                              const CallExpr *E, llvm::Constant *calleeValue) {
316
98.5k
  CGCallee callee = CGCallee::forDirect(calleeValue, FD);
317
98.5k
  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
318
98.5k
}
319
320
/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
321
/// depending on IntrinsicID.
322
///
323
/// \arg CGF The current codegen function.
324
/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
325
/// \arg X The first argument to the llvm.*.with.overflow.*.
326
/// \arg Y The second argument to the llvm.*.with.overflow.*.
327
/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
328
/// \returns The result (i.e. sum/product) returned by the intrinsic.
329
static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
330
                                          const llvm::Intrinsic::ID IntrinsicID,
331
                                          llvm::Value *X, llvm::Value *Y,
332
147
                                          llvm::Value *&Carry) {
333
147
  // Make sure we have integers of the same width.
334
147
  assert(X->getType() == Y->getType() &&
335
147
         "Arguments must be the same type. (Did you forget to make sure both "
336
147
         "arguments have the same integer width?)");
337
147
338
147
  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
339
147
  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
340
147
  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
341
147
  return CGF.Builder.CreateExtractValue(Tmp, 0);
342
147
}
343
344
static Value *emitRangedBuiltin(CodeGenFunction &CGF,
345
                                unsigned IntrinsicID,
346
9
                                int low, int high) {
347
9
    llvm::MDBuilder MDHelper(CGF.getLLVMContext());
348
9
    llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
349
9
    Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
350
9
    llvm::Instruction *Call = CGF.Builder.CreateCall(F);
351
9
    Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
352
9
    return Call;
353
9
}
354
355
namespace {
356
  struct WidthAndSignedness {
357
    unsigned Width;
358
    bool Signed;
359
  };
360
}
361
362
static WidthAndSignedness
363
getIntegerWidthAndSignedness(const clang::ASTContext &context,
364
99
                             const clang::QualType Type) {
365
99
  assert(Type->isIntegerType() && "Given type is not an integer.");
366
99
  unsigned Width = Type->isBooleanType() ? 
118
:
context.getTypeInfo(Type).Width81
;
367
99
  bool Signed = Type->isSignedIntegerType();
368
99
  return {Width, Signed};
369
99
}
370
371
// Given one or more integer types, this function produces an integer type that
372
// encompasses them: any value in one of the given types could be expressed in
373
// the encompassing type.
374
static struct WidthAndSignedness
375
33
EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
376
33
  assert(Types.size() > 0 && "Empty list of types.");
377
33
378
33
  // If any of the given types is signed, we must return a signed type.
379
33
  bool Signed = false;
380
99
  for (const auto &Type : Types) {
381
99
    Signed |= Type.Signed;
382
99
  }
383
33
384
33
  // The encompassing type must have a width greater than or equal to the width
385
33
  // of the specified types.  Aditionally, if the encompassing type is signed,
386
33
  // its width must be strictly greater than the width of any unsigned types
387
33
  // given.
388
33
  unsigned Width = 0;
389
99
  for (const auto &Type : Types) {
390
45
    unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
391
99
    if (
Width < MinWidth99
) {
392
36
      Width = MinWidth;
393
36
    }
394
99
  }
395
33
396
33
  return {Width, Signed};
397
33
}
398
399
650
Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
400
650
  llvm::Type *DestType = Int8PtrTy;
401
650
  if (ArgValue->getType() != DestType)
402
650
    ArgValue =
403
650
        Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
404
650
405
650
  Intrinsic::ID inst = IsStart ? 
Intrinsic::vastart313
:
Intrinsic::vaend337
;
406
650
  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
407
650
}
408
409
/// Checks if using the result of __builtin_object_size(p, @p From) in place of
410
/// __builtin_object_size(p, @p To) is correct
411
30
static bool areBOSTypesCompatible(int From, int To) {
412
30
  // Note: Our __builtin_object_size implementation currently treats Type=0 and
413
30
  // Type=2 identically. Encoding this implementation detail here may make
414
30
  // improving __builtin_object_size difficult in the future, so it's omitted.
415
30
  return From == To || 
(From == 0 && 12
To == 13
) ||
(From == 3 && 11
To == 23
);
416
30
}
417
418
static llvm::Value *
419
20
getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
420
20
  return ConstantInt::get(ResType, (Type & 2) ? 
020
:
-10
, /*isSigned=*/true);
421
20
}
422
423
llvm::Value *
424
CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
425
                                                 llvm::IntegerType *ResType,
426
102
                                                 llvm::Value *EmittedE) {
427
102
  uint64_t ObjectSize;
428
102
  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
429
51
    return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
430
51
  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
431
51
}
432
433
/// Returns a Value corresponding to the size of the given expression.
434
/// This Value may be either of the following:
435
///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
436
///     it)
437
///   - A call to the @llvm.objectsize intrinsic
438
///
439
/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
440
/// and we wouldn't otherwise try to reference a pass_object_size parameter,
441
/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
442
llvm::Value *
443
CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
444
                                       llvm::IntegerType *ResType,
445
4.04k
                                       llvm::Value *EmittedE) {
446
4.04k
  // We need to reference an argument if the pointer is a parameter with the
447
4.04k
  // pass_object_size attribute.
448
4.04k
  if (auto *
D4.04k
= dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
449
1.59k
    auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
450
1.59k
    auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
451
1.59k
    if (
Param != nullptr && 1.59k
PS != nullptr565
&&
452
1.59k
        
areBOSTypesCompatible(PS->getType(), Type)30
) {
453
20
      auto Iter = SizeArguments.find(Param);
454
20
      assert(Iter != SizeArguments.end());
455
20
456
20
      const ImplicitParamDecl *D = Iter->second;
457
20
      auto DIter = LocalDeclMap.find(D);
458
20
      assert(DIter != LocalDeclMap.end());
459
20
460
20
      return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
461
20
                              getContext().getSizeType(), E->getLocStart());
462
20
    }
463
4.02k
  }
464
4.02k
465
4.02k
  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
466
4.02k
  // evaluate E for side-effects. In either case, we shouldn't lower to
467
4.02k
  // @llvm.objectsize.
468
4.02k
  
if (4.02k
Type == 3 || 4.02k
(!EmittedE && 4.00k
E->HasSideEffects(getContext())3.96k
))
469
20
    return getDefaultBuiltinObjectSizeResult(Type, ResType);
470
4.00k
471
4.00k
  
Value *Ptr = EmittedE ? 4.00k
EmittedE37
:
EmitScalarExpr(E)3.96k
;
472
4.04k
  assert(Ptr->getType()->isPointerTy() &&
473
4.04k
         "Non-pointer passed to __builtin_object_size?");
474
4.04k
475
4.04k
  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
476
4.04k
477
4.04k
  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
478
4.04k
  Value *Min = Builder.getInt1((Type & 2) != 0);
479
4.04k
  // For GCC compatability, __builtin_object_size treat NULL as unknown size.
480
4.04k
  Value *NullIsUnknown = Builder.getTrue();
481
4.04k
  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
482
4.04k
}
483
484
// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
485
// handle them here.
486
enum class CodeGenFunction::MSVCIntrin {
487
  _BitScanForward,
488
  _BitScanReverse,
489
  _InterlockedAnd,
490
  _InterlockedDecrement,
491
  _InterlockedExchange,
492
  _InterlockedExchangeAdd,
493
  _InterlockedExchangeSub,
494
  _InterlockedIncrement,
495
  _InterlockedOr,
496
  _InterlockedXor,
497
  _interlockedbittestandset,
498
  __fastfail,
499
};
500
501
Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
502
127
                                            const CallExpr *E) {
503
127
  switch (BuiltinID) {
504
18
  case MSVCIntrin::_BitScanForward:
505
18
  case MSVCIntrin::_BitScanReverse: {
506
18
    Value *ArgValue = EmitScalarExpr(E->getArg(1));
507
18
508
18
    llvm::Type *ArgType = ArgValue->getType();
509
18
    llvm::Type *IndexType =
510
18
      EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
511
18
    llvm::Type *ResultType = ConvertType(E->getType());
512
18
513
18
    Value *ArgZero = llvm::Constant::getNullValue(ArgType);
514
18
    Value *ResZero = llvm::Constant::getNullValue(ResultType);
515
18
    Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
516
18
517
18
    BasicBlock *Begin = Builder.GetInsertBlock();
518
18
    BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
519
18
    Builder.SetInsertPoint(End);
520
18
    PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
521
18
522
18
    Builder.SetInsertPoint(Begin);
523
18
    Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
524
18
    BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
525
18
    Builder.CreateCondBr(IsZero, End, NotZero);
526
18
    Result->addIncoming(ResZero, Begin);
527
18
528
18
    Builder.SetInsertPoint(NotZero);
529
18
    Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
530
18
531
18
    if (
BuiltinID == MSVCIntrin::_BitScanForward18
) {
532
9
      Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
533
9
      Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
534
9
      ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
535
9
      Builder.CreateStore(ZeroCount, IndexAddress, false);
536
18
    } else {
537
9
      unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
538
9
      Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
539
9
540
9
      Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
541
9
      Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
542
9
      ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
543
9
      Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
544
9
      Builder.CreateStore(Index, IndexAddress, false);
545
9
    }
546
18
    Builder.CreateBr(End);
547
18
    Result->addIncoming(ResOne, NotZero);
548
18
549
18
    Builder.SetInsertPoint(End);
550
18
    return Result;
551
18
  }
552
13
  case MSVCIntrin::_InterlockedAnd:
553
13
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
554
16
  case MSVCIntrin::_InterlockedExchange:
555
16
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
556
13
  case MSVCIntrin::_InterlockedExchangeAdd:
557
13
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
558
13
  case MSVCIntrin::_InterlockedExchangeSub:
559
13
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
560
13
  case MSVCIntrin::_InterlockedOr:
561
13
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
562
13
  case MSVCIntrin::_InterlockedXor:
563
13
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
564
18
565
5
  case MSVCIntrin::_interlockedbittestandset: {
566
5
    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
567
5
    llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
568
5
    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
569
5
        AtomicRMWInst::Or, Addr,
570
5
        Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
571
5
        llvm::AtomicOrdering::SequentiallyConsistent);
572
5
    // Shift the relevant bit to the least significant position, truncate to
573
5
    // the result type, and test the low bit.
574
5
    llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
575
5
    llvm::Value *Truncated =
576
5
        Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
577
5
    return Builder.CreateAnd(Truncated,
578
5
                             ConstantInt::get(Truncated->getType(), 1));
579
18
  }
580
18
581
10
  case MSVCIntrin::_InterlockedDecrement: {
582
10
    llvm::Type *IntTy = ConvertType(E->getType());
583
10
    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
584
10
      AtomicRMWInst::Sub,
585
10
      EmitScalarExpr(E->getArg(0)),
586
10
      ConstantInt::get(IntTy, 1),
587
10
      llvm::AtomicOrdering::SequentiallyConsistent);
588
10
    return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
589
18
  }
590
10
  case MSVCIntrin::_InterlockedIncrement: {
591
10
    llvm::Type *IntTy = ConvertType(E->getType());
592
10
    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
593
10
      AtomicRMWInst::Add,
594
10
      EmitScalarExpr(E->getArg(0)),
595
10
      ConstantInt::get(IntTy, 1),
596
10
      llvm::AtomicOrdering::SequentiallyConsistent);
597
10
    return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
598
18
  }
599
18
600
3
  case MSVCIntrin::__fastfail: {
601
3
    // Request immediate process termination from the kernel. The instruction
602
3
    // sequences to do this are documented on MSDN:
603
3
    // https://msdn.microsoft.com/en-us/library/dn774154.aspx
604
3
    llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
605
3
    StringRef Asm, Constraints;
606
3
    switch (ISA) {
607
0
    default:
608
0
      ErrorUnsupported(E, "__fastfail call for this architecture");
609
0
      break;
610
2
    case llvm::Triple::x86:
611
2
    case llvm::Triple::x86_64:
612
2
      Asm = "int $$0x29";
613
2
      Constraints = "{cx}";
614
2
      break;
615
1
    case llvm::Triple::thumb:
616
1
      Asm = "udf #251";
617
1
      Constraints = "{r0}";
618
1
      break;
619
3
    }
620
3
    llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
621
3
    llvm::InlineAsm *IA =
622
3
        llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
623
3
    llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
624
3
        getLLVMContext(), llvm::AttributeList::FunctionIndex,
625
3
        llvm::Attribute::NoReturn);
626
3
    CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
627
3
    CS.setAttributes(NoReturnAttr);
628
3
    return CS.getInstruction();
629
3
  }
630
0
  }
631
0
  
llvm_unreachable0
("Incorrect MSVC intrinsic!");
632
0
}
633
634
namespace {
635
// ARC cleanup for __builtin_os_log_format
636
struct CallObjCArcUse final : EHScopeStack::Cleanup {
637
1
  CallObjCArcUse(llvm::Value *object) : object(object) {}
638
  llvm::Value *object;
639
640
1
  void Emit(CodeGenFunction &CGF, Flags flags) override {
641
1
    CGF.EmitARCIntrinsicUse(object);
642
1
  }
643
};
644
}
645
646
Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
647
1.16k
                                                 BuiltinCheckKind Kind) {
648
1.16k
  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
649
1.16k
          && "Unsupported builtin check kind");
650
1.16k
651
1.16k
  Value *ArgValue = EmitScalarExpr(E);
652
1.16k
  if (
!SanOpts.has(SanitizerKind::Builtin) || 1.16k
!getTarget().isCLZForZeroUndef()12
)
653
1.15k
    return ArgValue;
654
6
655
6
  SanitizerScope SanScope(this);
656
6
  Value *Cond = Builder.CreateICmpNE(
657
6
      ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
658
6
  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
659
6
            SanitizerHandler::InvalidBuiltin,
660
6
            {EmitCheckSourceLocation(E->getExprLoc()),
661
6
             llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
662
6
            None);
663
6
  return ArgValue;
664
6
}
665
666
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
667
                                        unsigned BuiltinID, const CallExpr *E,
668
155k
                                        ReturnValueSlot ReturnValue) {
669
155k
  // See if we can constant fold this builtin.  If so, don't emit it at all.
670
155k
  Expr::EvalResult Result;
671
155k
  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
672
155k
      
!Result.hasSideEffects()2.61k
) {
673
2.61k
    if (Result.Val.isInt())
674
1.99k
      return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
675
1.99k
                                                Result.Val.getInt()));
676
619
    
if (619
Result.Val.isFloat()619
)
677
614
      return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
678
614
                                               Result.Val.getFloat()));
679
152k
  }
680
152k
681
152k
  switch (BuiltinID) {
682
76.0k
  default: break;  // Handle intrinsics and libm functions below.
683
5
  case Builtin::BI__builtin___CFStringMakeConstantString:
684
5
  case Builtin::BI__builtin___NSStringMakeConstantString:
685
5
    return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
686
642
  case Builtin::BI__builtin_stdarg_start:
687
642
  case Builtin::BI__builtin_va_start:
688
642
  case Builtin::BI__va_start:
689
642
  case Builtin::BI__builtin_va_end:
690
642
    return RValue::get(
691
642
        EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
692
0
                           ? EmitScalarExpr(E->getArg(0))
693
642
                           : EmitVAListRef(E->getArg(0)).getPointer(),
694
642
                       BuiltinID != Builtin::BI__builtin_va_end));
695
51
  case Builtin::BI__builtin_va_copy: {
696
51
    Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
697
51
    Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
698
51
699
51
    llvm::Type *Type = Int8PtrTy;
700
51
701
51
    DstPtr = Builder.CreateBitCast(DstPtr, Type);
702
51
    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
703
51
    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
704
51
                                          {DstPtr, SrcPtr}));
705
642
  }
706
3
  case Builtin::BI__builtin_abs:
707
3
  case Builtin::BI__builtin_labs:
708
3
  case Builtin::BI__builtin_llabs: {
709
3
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
710
3
711
3
    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
712
3
    Value *CmpResult =
713
3
    Builder.CreateICmpSGE(ArgValue,
714
3
                          llvm::Constant::getNullValue(ArgValue->getType()),
715
3
                                                            "abscond");
716
3
    Value *Result =
717
3
      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
718
3
719
3
    return RValue::get(Result);
720
3
  }
721
207
  case Builtin::BI__builtin_fabs:
722
207
  case Builtin::BI__builtin_fabsf:
723
207
  case Builtin::BI__builtin_fabsl: {
724
207
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
725
207
  }
726
6
  case Builtin::BI__builtin_fmod:
727
6
  case Builtin::BI__builtin_fmodf:
728
6
  case Builtin::BI__builtin_fmodl: {
729
6
    Value *Arg1 = EmitScalarExpr(E->getArg(0));
730
6
    Value *Arg2 = EmitScalarExpr(E->getArg(1));
731
6
    Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
732
6
    return RValue::get(Result);
733
6
  }
734
1.50k
  case Builtin::BI__builtin_copysign:
735
1.50k
  case Builtin::BI__builtin_copysignf:
736
1.50k
  case Builtin::BI__builtin_copysignl: {
737
1.50k
    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
738
1.50k
  }
739
6
  case Builtin::BI__builtin_ceil:
740
6
  case Builtin::BI__builtin_ceilf:
741
6
  case Builtin::BI__builtin_ceill: {
742
6
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
743
6
  }
744
6
  case Builtin::BI__builtin_floor:
745
6
  case Builtin::BI__builtin_floorf:
746
6
  case Builtin::BI__builtin_floorl: {
747
6
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
748
6
  }
749
6
  case Builtin::BI__builtin_trunc:
750
6
  case Builtin::BI__builtin_truncf:
751
6
  case Builtin::BI__builtin_truncl: {
752
6
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
753
6
  }
754
6
  case Builtin::BI__builtin_rint:
755
6
  case Builtin::BI__builtin_rintf:
756
6
  case Builtin::BI__builtin_rintl: {
757
6
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
758
6
  }
759
6
  case Builtin::BI__builtin_nearbyint:
760
6
  case Builtin::BI__builtin_nearbyintf:
761
6
  case Builtin::BI__builtin_nearbyintl: {
762
6
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
763
6
  }
764
6
  case Builtin::BI__builtin_round:
765
6
  case Builtin::BI__builtin_roundf:
766
6
  case Builtin::BI__builtin_roundl: {
767
6
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
768
6
  }
769
6
  case Builtin::BI__builtin_fmin:
770
6
  case Builtin::BI__builtin_fminf:
771
6
  case Builtin::BI__builtin_fminl: {
772
6
    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
773
6
  }
774
100
  case Builtin::BI__builtin_fmax:
775
100
  case Builtin::BI__builtin_fmaxf:
776
100
  case Builtin::BI__builtin_fmaxl: {
777
100
    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
778
100
  }
779
6
  case Builtin::BI__builtin_conj:
780
6
  case Builtin::BI__builtin_conjf:
781
6
  case Builtin::BI__builtin_conjl: {
782
6
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
783
6
    Value *Real = ComplexVal.first;
784
6
    Value *Imag = ComplexVal.second;
785
6
    Value *Zero =
786
6
      Imag->getType()->isFPOrFPVectorTy()
787
6
        ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
788
0
        : llvm::Constant::getNullValue(Imag->getType());
789
6
790
6
    Imag = Builder.CreateFSub(Zero, Imag, "sub");
791
6
    return RValue::getComplex(std::make_pair(Real, Imag));
792
6
  }
793
30
  case Builtin::BI__builtin_creal:
794
30
  case Builtin::BI__builtin_crealf:
795
30
  case Builtin::BI__builtin_creall:
796
30
  case Builtin::BIcreal:
797
30
  case Builtin::BIcrealf:
798
30
  case Builtin::BIcreall: {
799
30
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
800
30
    return RValue::get(ComplexVal.first);
801
30
  }
802
30
803
11
  case Builtin::BI__builtin_cimag:
804
11
  case Builtin::BI__builtin_cimagf:
805
11
  case Builtin::BI__builtin_cimagl:
806
11
  case Builtin::BIcimag:
807
11
  case Builtin::BIcimagf:
808
11
  case Builtin::BIcimagl: {
809
11
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
810
11
    return RValue::get(ComplexVal.second);
811
11
  }
812
11
813
267
  case Builtin::BI__builtin_ctzs:
814
267
  case Builtin::BI__builtin_ctz:
815
267
  case Builtin::BI__builtin_ctzl:
816
267
  case Builtin::BI__builtin_ctzll: {
817
267
    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
818
267
819
267
    llvm::Type *ArgType = ArgValue->getType();
820
267
    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
821
267
822
267
    llvm::Type *ResultType = ConvertType(E->getType());
823
267
    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
824
267
    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
825
267
    if (Result->getType() != ResultType)
826
57
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
827
57
                                     "cast");
828
267
    return RValue::get(Result);
829
267
  }
830
895
  case Builtin::BI__builtin_clzs:
831
895
  case Builtin::BI__builtin_clz:
832
895
  case Builtin::BI__builtin_clzl:
833
895
  case Builtin::BI__builtin_clzll: {
834
895
    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
835
895
836
895
    llvm::Type *ArgType = ArgValue->getType();
837
895
    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
838
895
839
895
    llvm::Type *ResultType = ConvertType(E->getType());
840
895
    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
841
895
    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
842
895
    if (Result->getType() != ResultType)
843
196
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
844
196
                                     "cast");
845
895
    return RValue::get(Result);
846
895
  }
847
10
  case Builtin::BI__builtin_ffs:
848
10
  case Builtin::BI__builtin_ffsl:
849
10
  case Builtin::BI__builtin_ffsll: {
850
10
    // ffs(x) -> x ? cttz(x) + 1 : 0
851
10
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
852
10
853
10
    llvm::Type *ArgType = ArgValue->getType();
854
10
    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
855
10
856
10
    llvm::Type *ResultType = ConvertType(E->getType());
857
10
    Value *Tmp =
858
10
        Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
859
10
                          llvm::ConstantInt::get(ArgType, 1));
860
10
    Value *Zero = llvm::Constant::getNullValue(ArgType);
861
10
    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
862
10
    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
863
10
    if (Result->getType() != ResultType)
864
6
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
865
6
                                     "cast");
866
10
    return RValue::get(Result);
867
10
  }
868
6
  case Builtin::BI__builtin_parity:
869
6
  case Builtin::BI__builtin_parityl:
870
6
  case Builtin::BI__builtin_parityll: {
871
6
    // parity(x) -> ctpop(x) & 1
872
6
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
873
6
874
6
    llvm::Type *ArgType = ArgValue->getType();
875
6
    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
876
6
877
6
    llvm::Type *ResultType = ConvertType(E->getType());
878
6
    Value *Tmp = Builder.CreateCall(F, ArgValue);
879
6
    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
880
6
    if (Result->getType() != ResultType)
881
4
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
882
4
                                     "cast");
883
6
    return RValue::get(Result);
884
6
  }
885
16
  case Builtin::BI__popcnt16:
886
16
  case Builtin::BI__popcnt:
887
16
  case Builtin::BI__popcnt64:
888
16
  case Builtin::BI__builtin_popcount:
889
16
  case Builtin::BI__builtin_popcountl:
890
16
  case Builtin::BI__builtin_popcountll: {
891
16
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
892
16
893
16
    llvm::Type *ArgType = ArgValue->getType();
894
16
    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
895
16
896
16
    llvm::Type *ResultType = ConvertType(E->getType());
897
16
    Value *Result = Builder.CreateCall(F, ArgValue);
898
16
    if (Result->getType() != ResultType)
899
9
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
900
9
                                     "cast");
901
16
    return RValue::get(Result);
902
16
  }
903
30
  case Builtin::BI_rotr8:
904
30
  case Builtin::BI_rotr16:
905
30
  case Builtin::BI_rotr:
906
30
  case Builtin::BI_lrotr:
907
30
  case Builtin::BI_rotr64: {
908
30
    Value *Val = EmitScalarExpr(E->getArg(0));
909
30
    Value *Shift = EmitScalarExpr(E->getArg(1));
910
30
911
30
    llvm::Type *ArgType = Val->getType();
912
30
    Shift = Builder.CreateIntCast(Shift, ArgType, false);
913
30
    unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
914
30
    Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
915
30
    Value *ArgZero = llvm::Constant::getNullValue(ArgType);
916
30
917
30
    Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
918
30
    Shift = Builder.CreateAnd(Shift, Mask);
919
30
    Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
920
30
921
30
    Value *RightShifted = Builder.CreateLShr(Val, Shift);
922
30
    Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
923
30
    Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
924
30
925
30
    Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
926
30
    Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
927
30
    return RValue::get(Result);
928
30
  }
929
30
  case Builtin::BI_rotl8:
930
30
  case Builtin::BI_rotl16:
931
30
  case Builtin::BI_rotl:
932
30
  case Builtin::BI_lrotl:
933
30
  case Builtin::BI_rotl64: {
934
30
    Value *Val = EmitScalarExpr(E->getArg(0));
935
30
    Value *Shift = EmitScalarExpr(E->getArg(1));
936
30
937
30
    llvm::Type *ArgType = Val->getType();
938
30
    Shift = Builder.CreateIntCast(Shift, ArgType, false);
939
30
    unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
940
30
    Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
941
30
    Value *ArgZero = llvm::Constant::getNullValue(ArgType);
942
30
943
30
    Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
944
30
    Shift = Builder.CreateAnd(Shift, Mask);
945
30
    Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
946
30
947
30
    Value *LeftShifted = Builder.CreateShl(Val, Shift);
948
30
    Value *RightShifted = Builder.CreateLShr(Val, RightShift);
949
30
    Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
950
30
951
30
    Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
952
30
    Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
953
30
    return RValue::get(Result);
954
30
  }
955
4
  case Builtin::BI__builtin_unpredictable: {
956
4
    // Always return the argument of __builtin_unpredictable. LLVM does not
957
4
    // handle this builtin. Metadata for this builtin should be added directly
958
4
    // to instructions such as branches or switches that use it.
959
4
    return RValue::get(EmitScalarExpr(E->getArg(0)));
960
30
  }
961
5.06k
  case Builtin::BI__builtin_expect: {
962
5.06k
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
963
5.06k
    llvm::Type *ArgType = ArgValue->getType();
964
5.06k
965
5.06k
    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
966
5.06k
    // Don't generate llvm.expect on -O0 as the backend won't use it for
967
5.06k
    // anything.
968
5.06k
    // Note, we still IRGen ExpectedValue because it could have side-effects.
969
5.06k
    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
970
7
      return RValue::get(ArgValue);
971
5.05k
972
5.05k
    Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
973
5.05k
    Value *Result =
974
5.05k
        Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
975
5.05k
    return RValue::get(Result);
976
5.05k
  }
977
4
  case Builtin::BI__builtin_assume_aligned: {
978
4
    Value *PtrValue = EmitScalarExpr(E->getArg(0));
979
4
    Value *OffsetValue =
980
4
      (E->getNumArgs() > 2) ? 
EmitScalarExpr(E->getArg(2))3
:
nullptr1
;
981
4
982
4
    Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
983
4
    ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
984
4
    unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
985
4
986
4
    EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
987
4
    return RValue::get(PtrValue);
988
5.05k
  }
989
28
  case Builtin::BI__assume:
990
28
  case Builtin::BI__builtin_assume: {
991
28
    if (E->getArg(0)->HasSideEffects(getContext()))
992
4
      return RValue::get(nullptr);
993
24
994
24
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
995
24
    Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
996
24
    return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
997
24
  }
998
26
  case Builtin::BI__builtin_bswap16:
999
26
  case Builtin::BI__builtin_bswap32:
1000
26
  case Builtin::BI__builtin_bswap64: {
1001
26
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1002
26
  }
1003
12
  case Builtin::BI__builtin_bitreverse8:
1004
12
  case Builtin::BI__builtin_bitreverse16:
1005
12
  case Builtin::BI__builtin_bitreverse32:
1006
12
  case Builtin::BI__builtin_bitreverse64: {
1007
12
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1008
12
  }
1009
3.99k
  case Builtin::BI__builtin_object_size: {
1010
3.99k
    unsigned Type =
1011
3.99k
        E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1012
3.99k
    auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1013
3.99k
1014
3.99k
    // We pass this builtin onto the optimizer so that it can figure out the
1015
3.99k
    // object size in more complex cases.
1016
3.99k
    return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1017
3.99k
                                             /*EmittedE=*/nullptr));
1018
12
  }
1019
30
  case Builtin::BI__builtin_prefetch: {
1020
30
    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1021
30
    // FIXME: Technically these constants should of type 'int', yes?
1022
19
    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1023
11
      llvm::ConstantInt::get(Int32Ty, 0);
1024
16
    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1025
14
      llvm::ConstantInt::get(Int32Ty, 3);
1026
30
    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1027
30
    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1028
30
    return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1029
12
  }
1030
2
  case Builtin::BI__builtin_readcyclecounter: {
1031
2
    Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1032
2
    return RValue::get(Builder.CreateCall(F));
1033
12
  }
1034
1
  case Builtin::BI__builtin___clear_cache: {
1035
1
    Value *Begin = EmitScalarExpr(E->getArg(0));
1036
1
    Value *End = EmitScalarExpr(E->getArg(1));
1037
1
    Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1038
1
    return RValue::get(Builder.CreateCall(F, {Begin, End}));
1039
12
  }
1040
18
  case Builtin::BI__builtin_trap:
1041
18
    return RValue::get(EmitTrapCall(Intrinsic::trap));
1042
0
  case Builtin::BI__debugbreak:
1043
0
    return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1044
23
  case Builtin::BI__builtin_unreachable: {
1045
23
    if (
SanOpts.has(SanitizerKind::Unreachable)23
) {
1046
7
      SanitizerScope SanScope(this);
1047
7
      EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1048
7
                               SanitizerKind::Unreachable),
1049
7
                SanitizerHandler::BuiltinUnreachable,
1050
7
                EmitCheckSourceLocation(E->getExprLoc()), None);
1051
7
    } else
1052
16
      Builder.CreateUnreachable();
1053
23
1054
23
    // We do need to preserve an insertion point.
1055
23
    EmitBlock(createBasicBlock("unreachable.cont"));
1056
23
1057
23
    return RValue::get(nullptr);
1058
12
  }
1059
12
1060
6
  case Builtin::BI__builtin_powi:
1061
6
  case Builtin::BI__builtin_powif:
1062
6
  case Builtin::BI__builtin_powil: {
1063
6
    Value *Base = EmitScalarExpr(E->getArg(0));
1064
6
    Value *Exponent = EmitScalarExpr(E->getArg(1));
1065
6
    llvm::Type *ArgType = Base->getType();
1066
6
    Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1067
6
    return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1068
6
  }
1069
6
1070
62
  case Builtin::BI__builtin_isgreater:
1071
62
  case Builtin::BI__builtin_isgreaterequal:
1072
62
  case Builtin::BI__builtin_isless:
1073
62
  case Builtin::BI__builtin_islessequal:
1074
62
  case Builtin::BI__builtin_islessgreater:
1075
62
  case Builtin::BI__builtin_isunordered: {
1076
62
    // Ordered comparisons: we know the arguments to these are matching scalar
1077
62
    // floating point values.
1078
62
    Value *LHS = EmitScalarExpr(E->getArg(0));
1079
62
    Value *RHS = EmitScalarExpr(E->getArg(1));
1080
62
1081
62
    switch (BuiltinID) {
1082
0
    
default: 0
llvm_unreachable0
("Unknown ordered comparison");
1083
6
    case Builtin::BI__builtin_isgreater:
1084
6
      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1085
6
      break;
1086
6
    case Builtin::BI__builtin_isgreaterequal:
1087
6
      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1088
6
      break;
1089
7
    case Builtin::BI__builtin_isless:
1090
7
      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1091
7
      break;
1092
6
    case Builtin::BI__builtin_islessequal:
1093
6
      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1094
6
      break;
1095
6
    case Builtin::BI__builtin_islessgreater:
1096
6
      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1097
6
      break;
1098
31
    case Builtin::BI__builtin_isunordered:
1099
31
      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1100
31
      break;
1101
62
    }
1102
62
    // ZExt bool to int type.
1103
62
    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1104
62
  }
1105
1.16k
  case Builtin::BI__builtin_isnan: {
1106
1.16k
    Value *V = EmitScalarExpr(E->getArg(0));
1107
1.16k
    V = Builder.CreateFCmpUNO(V, V, "cmp");
1108
1.16k
    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1109
62
  }
1110
62
1111
2.07k
  case Builtin::BIfinite:
1112
2.07k
  case Builtin::BI__finite:
1113
2.07k
  case Builtin::BIfinitef:
1114
2.07k
  case Builtin::BI__finitef:
1115
2.07k
  case Builtin::BIfinitel:
1116
2.07k
  case Builtin::BI__finitel:
1117
2.07k
  case Builtin::BI__builtin_isinf:
1118
2.07k
  case Builtin::BI__builtin_isfinite: {
1119
2.07k
    // isinf(x)    --> fabs(x) == infinity
1120
2.07k
    // isfinite(x) --> fabs(x) != infinity
1121
2.07k
    // x != NaN via the ordered compare in either case.
1122
2.07k
    Value *V = EmitScalarExpr(E->getArg(0));
1123
2.07k
    Value *Fabs = EmitFAbs(*this, V);
1124
2.07k
    Constant *Infinity = ConstantFP::getInfinity(V->getType());
1125
2.07k
    CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1126
1.60k
                                  ? CmpInst::FCMP_OEQ
1127
474
                                  : CmpInst::FCMP_ONE;
1128
2.07k
    Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1129
2.07k
    return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1130
2.07k
  }
1131
2.07k
1132
6
  case Builtin::BI__builtin_isinf_sign: {
1133
6
    // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1134
6
    Value *Arg = EmitScalarExpr(E->getArg(0));
1135
6
    Value *AbsArg = EmitFAbs(*this, Arg);
1136
6
    Value *IsInf = Builder.CreateFCmpOEQ(
1137
6
        AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1138
6
    Value *IsNeg = EmitSignBit(*this, Arg);
1139
6
1140
6
    llvm::Type *IntTy = ConvertType(E->getType());
1141
6
    Value *Zero = Constant::getNullValue(IntTy);
1142
6
    Value *One = ConstantInt::get(IntTy, 1);
1143
6
    Value *NegativeOne = ConstantInt::get(IntTy, -1);
1144
6
    Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1145
6
    Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1146
6
    return RValue::get(Result);
1147
2.07k
  }
1148
2.07k
1149
2
  case Builtin::BI__builtin_isnormal: {
1150
2
    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1151
2
    Value *V = EmitScalarExpr(E->getArg(0));
1152
2
    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1153
2
1154
2
    Value *Abs = EmitFAbs(*this, V);
1155
2
    Value *IsLessThanInf =
1156
2
      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1157
2
    APFloat Smallest = APFloat::getSmallestNormalized(
1158
2
                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1159
2
    Value *IsNormal =
1160
2
      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1161
2
                            "isnormal");
1162
2
    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1163
2
    V = Builder.CreateAnd(V, IsNormal, "and");
1164
2
    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1165
2.07k
  }
1166
2.07k
1167
0
  case Builtin::BI__builtin_fpclassify: {
1168
0
    Value *V = EmitScalarExpr(E->getArg(5));
1169
0
    llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1170
0
1171
0
    // Create Result
1172
0
    BasicBlock *Begin = Builder.GetInsertBlock();
1173
0
    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1174
0
    Builder.SetInsertPoint(End);
1175
0
    PHINode *Result =
1176
0
      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1177
0
                        "fpclassify_result");
1178
0
1179
0
    // if (V==0) return FP_ZERO
1180
0
    Builder.SetInsertPoint(Begin);
1181
0
    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1182
0
                                          "iszero");
1183
0
    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1184
0
    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1185
0
    Builder.CreateCondBr(IsZero, End, NotZero);
1186
0
    Result->addIncoming(ZeroLiteral, Begin);
1187
0
1188
0
    // if (V != V) return FP_NAN
1189
0
    Builder.SetInsertPoint(NotZero);
1190
0
    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1191
0
    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1192
0
    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1193
0
    Builder.CreateCondBr(IsNan, End, NotNan);
1194
0
    Result->addIncoming(NanLiteral, NotZero);
1195
0
1196
0
    // if (fabs(V) == infinity) return FP_INFINITY
1197
0
    Builder.SetInsertPoint(NotNan);
1198
0
    Value *VAbs = EmitFAbs(*this, V);
1199
0
    Value *IsInf =
1200
0
      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1201
0
                            "isinf");
1202
0
    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1203
0
    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1204
0
    Builder.CreateCondBr(IsInf, End, NotInf);
1205
0
    Result->addIncoming(InfLiteral, NotNan);
1206
0
1207
0
    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1208
0
    Builder.SetInsertPoint(NotInf);
1209
0
    APFloat Smallest = APFloat::getSmallestNormalized(
1210
0
        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1211
0
    Value *IsNormal =
1212
0
      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1213
0
                            "isnormal");
1214
0
    Value *NormalResult =
1215
0
      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1216
0
                           EmitScalarExpr(E->getArg(3)));
1217
0
    Builder.CreateBr(End);
1218
0
    Result->addIncoming(NormalResult, NotInf);
1219
0
1220
0
    // return Result
1221
0
    Builder.SetInsertPoint(End);
1222
0
    return RValue::get(Result);
1223
2.07k
  }
1224
2.07k
1225
175
  case Builtin::BIalloca:
1226
175
  case Builtin::BI_alloca:
1227
175
  case Builtin::BI__builtin_alloca: {
1228
175
    Value *Size = EmitScalarExpr(E->getArg(0));
1229
175
    const TargetInfo &TI = getContext().getTargetInfo();
1230
175
    // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1231
175
    unsigned SuitableAlignmentInBytes =
1232
175
        CGM.getContext()
1233
175
            .toCharUnitsFromBits(TI.getSuitableAlign())
1234
175
            .getQuantity();
1235
175
    AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1236
175
    AI->setAlignment(SuitableAlignmentInBytes);
1237
175
    return RValue::get(AI);
1238
175
  }
1239
175
1240
1
  case Builtin::BI__builtin_alloca_with_align: {
1241
1
    Value *Size = EmitScalarExpr(E->getArg(0));
1242
1
    Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1243
1
    auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1244
1
    unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1245
1
    unsigned AlignmentInBytes =
1246
1
        CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1247
1
    AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1248
1
    AI->setAlignment(AlignmentInBytes);
1249
1
    return RValue::get(AI);
1250
175
  }
1251
175
1252
42
  case Builtin::BIbzero:
1253
42
  case Builtin::BI__builtin_bzero: {
1254
42
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1255
42
    Value *SizeVal = EmitScalarExpr(E->getArg(1));
1256
42
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1257
42
                        E->getArg(0)->getExprLoc(), FD, 0);
1258
42
    Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1259
42
    return RValue::get(Dest.getPointer());
1260
42
  }
1261
138
  case Builtin::BImemcpy:
1262
138
  case Builtin::BI__builtin_memcpy: {
1263
138
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1264
138
    Address Src = EmitPointerWithAlignment(E->getArg(1));
1265
138
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
1266
138
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1267
138
                        E->getArg(0)->getExprLoc(), FD, 0);
1268
138
    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1269
138
                        E->getArg(1)->getExprLoc(), FD, 1);
1270
138
    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1271
138
    return RValue::get(Dest.getPointer());
1272
138
  }
1273
138
1274
1
  case Builtin::BI__builtin_char_memchr:
1275
1
    BuiltinID = Builtin::BI__builtin_memchr;
1276
1
    break;
1277
138
1278
1.76k
  case Builtin::BI__builtin___memcpy_chk: {
1279
1.76k
    // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1280
1.76k
    llvm::APSInt Size, DstSize;
1281
1.76k
    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1282
760
        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1283
1.63k
      break;
1284
130
    
if (130
Size.ugt(DstSize)130
)
1285
0
      break;
1286
130
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1287
130
    Address Src = EmitPointerWithAlignment(E->getArg(1));
1288
130
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1289
130
    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1290
130
    return RValue::get(Dest.getPointer());
1291
130
  }
1292
130
1293
1
  case Builtin::BI__builtin_objc_memmove_collectable: {
1294
1
    Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1295
1
    Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1296
1
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
1297
1
    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1298
1
                                                  DestAddr, SrcAddr, SizeVal);
1299
1
    return RValue::get(DestAddr.getPointer());
1300
130
  }
1301
130
1302
143
  case Builtin::BI__builtin___memmove_chk: {
1303
143
    // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1304
143
    llvm::APSInt Size, DstSize;
1305
143
    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1306
39
        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1307
134
      break;
1308
9
    
if (9
Size.ugt(DstSize)9
)
1309
0
      break;
1310
9
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1311
9
    Address Src = EmitPointerWithAlignment(E->getArg(1));
1312
9
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1313
9
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
1314
9
    return RValue::get(Dest.getPointer());
1315
9
  }
1316
9
1317
109
  case Builtin::BImemmove:
1318
109
  case Builtin::BI__builtin_memmove: {
1319
109
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1320
109
    Address Src = EmitPointerWithAlignment(E->getArg(1));
1321
109
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
1322
109
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1323
109
                        E->getArg(0)->getExprLoc(), FD, 0);
1324
109
    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1325
109
                        E->getArg(1)->getExprLoc(), FD, 1);
1326
109
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
1327
109
    return RValue::get(Dest.getPointer());
1328
109
  }
1329
59
  case Builtin::BImemset:
1330
59
  case Builtin::BI__builtin_memset: {
1331
59
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1332
59
    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1333
59
                                         Builder.getInt8Ty());
1334
59
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
1335
59
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1336
59
                        E->getArg(0)->getExprLoc(), FD, 0);
1337
59
    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1338
59
    return RValue::get(Dest.getPointer());
1339
59
  }
1340
1.21k
  case Builtin::BI__builtin___memset_chk: {
1341
1.21k
    // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1342
1.21k
    llvm::APSInt Size, DstSize;
1343
1.21k
    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1344
831
        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1345
938
      break;
1346
273
    
if (273
Size.ugt(DstSize)273
)
1347
0
      break;
1348
273
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1349
273
    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1350
273
                                         Builder.getInt8Ty());
1351
273
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1352
273
    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1353
273
    return RValue::get(Dest.getPointer());
1354
273
  }
1355
0
  case Builtin::BI__builtin_dwarf_cfa: {
1356
0
    // The offset in bytes from the first argument to the CFA.
1357
0
    //
1358
0
    // Why on earth is this in the frontend?  Is there any reason at
1359
0
    // all that the backend can't reasonably determine this while
1360
0
    // lowering llvm.eh.dwarf.cfa()?
1361
0
    //
1362
0
    // TODO: If there's a satisfactory reason, add a target hook for
1363
0
    // this instead of hard-coding 0, which is correct for most targets.
1364
0
    int32_t Offset = 0;
1365
0
1366
0
    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1367
0
    return RValue::get(Builder.CreateCall(F,
1368
0
                                      llvm::ConstantInt::get(Int32Ty, Offset)));
1369
273
  }
1370
30
  case Builtin::BI__builtin_return_address: {
1371
30
    Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1372
30
                                                   getContext().UnsignedIntTy);
1373
30
    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1374
30
    return RValue::get(Builder.CreateCall(F, Depth));
1375
273
  }
1376
3
  case Builtin::BI_ReturnAddress: {
1377
3
    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1378
3
    return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1379
273
  }
1380
8
  case Builtin::BI__builtin_frame_address: {
1381
8
    Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1382
8
                                                   getContext().UnsignedIntTy);
1383
8
    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1384
8
    return RValue::get(Builder.CreateCall(F, Depth));
1385
273
  }
1386
2
  case Builtin::BI__builtin_extract_return_addr: {
1387
2
    Value *Address = EmitScalarExpr(E->getArg(0));
1388
2
    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1389
2
    return RValue::get(Result);
1390
273
  }
1391
0
  case Builtin::BI__builtin_frob_return_addr: {
1392
0
    Value *Address = EmitScalarExpr(E->getArg(0));
1393
0
    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1394
0
    return RValue::get(Result);
1395
273
  }
1396
2
  case Builtin::BI__builtin_dwarf_sp_column: {
1397
2
    llvm::IntegerType *Ty
1398
2
      = cast<llvm::IntegerType>(ConvertType(E->getType()));
1399
2
    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1400
2
    if (
Column == -12
) {
1401
0
      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1402
0
      return RValue::get(llvm::UndefValue::get(Ty));
1403
0
    }
1404
2
    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1405
2
  }
1406
2
  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1407
2
    Value *Address = EmitScalarExpr(E->getArg(0));
1408
2
    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1409
0
      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1410
2
    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1411
2
  }
1412
0
  case Builtin::BI__builtin_eh_return: {
1413
0
    Value *Int = EmitScalarExpr(E->getArg(0));
1414
0
    Value *Ptr = EmitScalarExpr(E->getArg(1));
1415
0
1416
0
    llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1417
0
    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1418
0
           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1419
0
    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1420
0
                                  ? Intrinsic::eh_return_i32
1421
0
                                  : Intrinsic::eh_return_i64);
1422
0
    Builder.CreateCall(F, {Int, Ptr});
1423
0
    Builder.CreateUnreachable();
1424
0
1425
0
    // We do need to preserve an insertion point.
1426
0
    EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1427
0
1428
0
    return RValue::get(nullptr);
1429
2
  }
1430
1
  case Builtin::BI__builtin_unwind_init: {
1431
1
    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1432
1
    return RValue::get(Builder.CreateCall(F));
1433
2
  }
1434
0
  case Builtin::BI__builtin_extend_pointer: {
1435
0
    // Extends a pointer to the size of an _Unwind_Word, which is
1436
0
    // uint64_t on all platforms.  Generally this gets poked into a
1437
0
    // register and eventually used as an address, so if the
1438
0
    // addressing registers are wider than pointers and the platform
1439
0
    // doesn't implicitly ignore high-order bits when doing
1440
0
    // addressing, we need to make sure we zext / sext based on
1441
0
    // the platform's expectations.
1442
0
    //
1443
0
    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1444
0
1445
0
    // Cast the pointer to intptr_t.
1446
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
1447
0
    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1448
0
1449
0
    // If that's 64 bits, we're done.
1450
0
    if (IntPtrTy->getBitWidth() == 64)
1451
0
      return RValue::get(Result);
1452
0
1453
0
    // Otherwise, ask the codegen data what to do.
1454
0
    
if (0
getTargetHooks().extendPointerWithSExt()0
)
1455
0
      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1456
0
    else
1457
0
      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1458
0
  }
1459
6
  case Builtin::BI__builtin_setjmp: {
1460
6
    // Buffer is a void**.
1461
6
    Address Buf = EmitPointerWithAlignment(E->getArg(0));
1462
6
1463
6
    // Store the frame pointer to the setjmp buffer.
1464
6
    Value *FrameAddr =
1465
6
      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1466
6
                         ConstantInt::get(Int32Ty, 0));
1467
6
    Builder.CreateStore(FrameAddr, Buf);
1468
6
1469
6
    // Store the stack pointer to the setjmp buffer.
1470
6
    Value *StackAddr =
1471
6
        Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1472
6
    Address StackSaveSlot =
1473
6
      Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1474
6
    Builder.CreateStore(StackAddr, StackSaveSlot);
1475
6
1476
6
    // Call LLVM's EH setjmp, which is lightweight.
1477
6
    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1478
6
    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1479
6
    return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1480
0
  }
1481
8
  case Builtin::BI__builtin_longjmp: {
1482
8
    Value *Buf = EmitScalarExpr(E->getArg(0));
1483
8
    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1484
8
1485
8
    // Call LLVM's EH longjmp, which is lightweight.
1486
8
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1487
8
1488
8
    // longjmp doesn't return; mark this as unreachable.
1489
8
    Builder.CreateUnreachable();
1490
8
1491
8
    // We do need to preserve an insertion point.
1492
8
    EmitBlock(createBasicBlock("longjmp.cont"));
1493
8
1494
8
    return RValue::get(nullptr);
1495
0
  }
1496
0
  case Builtin::BI__sync_fetch_and_add:
1497
0
  case Builtin::BI__sync_fetch_and_sub:
1498
0
  case Builtin::BI__sync_fetch_and_or:
1499
0
  case Builtin::BI__sync_fetch_and_and:
1500
0
  case Builtin::BI__sync_fetch_and_xor:
1501
0
  case Builtin::BI__sync_fetch_and_nand:
1502
0
  case Builtin::BI__sync_add_and_fetch:
1503
0
  case Builtin::BI__sync_sub_and_fetch:
1504
0
  case Builtin::BI__sync_and_and_fetch:
1505
0
  case Builtin::BI__sync_or_and_fetch:
1506
0
  case Builtin::BI__sync_xor_and_fetch:
1507
0
  case Builtin::BI__sync_nand_and_fetch:
1508
0
  case Builtin::BI__sync_val_compare_and_swap:
1509
0
  case Builtin::BI__sync_bool_compare_and_swap:
1510
0
  case Builtin::BI__sync_lock_test_and_set:
1511
0
  case Builtin::BI__sync_lock_release:
1512
0
  case Builtin::BI__sync_swap:
1513
0
    llvm_unreachable("Shouldn't make it through sema");
1514
94
  case Builtin::BI__sync_fetch_and_add_1:
1515
94
  case Builtin::BI__sync_fetch_and_add_2:
1516
94
  case Builtin::BI__sync_fetch_and_add_4:
1517
94
  case Builtin::BI__sync_fetch_and_add_8:
1518
94
  case Builtin::BI__sync_fetch_and_add_16:
1519
94
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1520
22
  case Builtin::BI__sync_fetch_and_sub_1:
1521
22
  case Builtin::BI__sync_fetch_and_sub_2:
1522
22
  case Builtin::BI__sync_fetch_and_sub_4:
1523
22
  case Builtin::BI__sync_fetch_and_sub_8:
1524
22
  case Builtin::BI__sync_fetch_and_sub_16:
1525
22
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1526
17
  case Builtin::BI__sync_fetch_and_or_1:
1527
17
  case Builtin::BI__sync_fetch_and_or_2:
1528
17
  case Builtin::BI__sync_fetch_and_or_4:
1529
17
  case Builtin::BI__sync_fetch_and_or_8:
1530
17
  case Builtin::BI__sync_fetch_and_or_16:
1531
17
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1532
17
  case Builtin::BI__sync_fetch_and_and_1:
1533
17
  case Builtin::BI__sync_fetch_and_and_2:
1534
17
  case Builtin::BI__sync_fetch_and_and_4:
1535
17
  case Builtin::BI__sync_fetch_and_and_8:
1536
17
  case Builtin::BI__sync_fetch_and_and_16:
1537
17
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1538
17
  case Builtin::BI__sync_fetch_and_xor_1:
1539
17
  case Builtin::BI__sync_fetch_and_xor_2:
1540
17
  case Builtin::BI__sync_fetch_and_xor_4:
1541
17
  case Builtin::BI__sync_fetch_and_xor_8:
1542
17
  case Builtin::BI__sync_fetch_and_xor_16:
1543
17
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1544
17
  case Builtin::BI__sync_fetch_and_nand_1:
1545
17
  case Builtin::BI__sync_fetch_and_nand_2:
1546
17
  case Builtin::BI__sync_fetch_and_nand_4:
1547
17
  case Builtin::BI__sync_fetch_and_nand_8:
1548
17
  case Builtin::BI__sync_fetch_and_nand_16:
1549
17
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1550
17
1551
17
  // Clang extensions: not overloaded yet.
1552
1
  case Builtin::BI__sync_fetch_and_min:
1553
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1554
1
  case Builtin::BI__sync_fetch_and_max:
1555
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1556
1
  case Builtin::BI__sync_fetch_and_umin:
1557
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1558
1
  case Builtin::BI__sync_fetch_and_umax:
1559
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1560
17
1561
12
  case Builtin::BI__sync_add_and_fetch_1:
1562
12
  case Builtin::BI__sync_add_and_fetch_2:
1563
12
  case Builtin::BI__sync_add_and_fetch_4:
1564
12
  case Builtin::BI__sync_add_and_fetch_8:
1565
12
  case Builtin::BI__sync_add_and_fetch_16:
1566
12
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1567
12
                                llvm::Instruction::Add);
1568
10
  case Builtin::BI__sync_sub_and_fetch_1:
1569
10
  case Builtin::BI__sync_sub_and_fetch_2:
1570
10
  case Builtin::BI__sync_sub_and_fetch_4:
1571
10
  case Builtin::BI__sync_sub_and_fetch_8:
1572
10
  case Builtin::BI__sync_sub_and_fetch_16:
1573
10
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1574
10
                                llvm::Instruction::Sub);
1575
9
  case Builtin::BI__sync_and_and_fetch_1:
1576
9
  case Builtin::BI__sync_and_and_fetch_2:
1577
9
  case Builtin::BI__sync_and_and_fetch_4:
1578
9
  case Builtin::BI__sync_and_and_fetch_8:
1579
9
  case Builtin::BI__sync_and_and_fetch_16:
1580
9
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1581
9
                                llvm::Instruction::And);
1582
9
  case Builtin::BI__sync_or_and_fetch_1:
1583
9
  case Builtin::BI__sync_or_and_fetch_2:
1584
9
  case Builtin::BI__sync_or_and_fetch_4:
1585
9
  case Builtin::BI__sync_or_and_fetch_8:
1586
9
  case Builtin::BI__sync_or_and_fetch_16:
1587
9
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1588
9
                                llvm::Instruction::Or);
1589
10
  case Builtin::BI__sync_xor_and_fetch_1:
1590
10
  case Builtin::BI__sync_xor_and_fetch_2:
1591
10
  case Builtin::BI__sync_xor_and_fetch_4:
1592
10
  case Builtin::BI__sync_xor_and_fetch_8:
1593
10
  case Builtin::BI__sync_xor_and_fetch_16:
1594
10
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1595
10
                                llvm::Instruction::Xor);
1596
9
  case Builtin::BI__sync_nand_and_fetch_1:
1597
9
  case Builtin::BI__sync_nand_and_fetch_2:
1598
9
  case Builtin::BI__sync_nand_and_fetch_4:
1599
9
  case Builtin::BI__sync_nand_and_fetch_8:
1600
9
  case Builtin::BI__sync_nand_and_fetch_16:
1601
9
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1602
9
                                llvm::Instruction::And, true);
1603
9
1604
43
  case Builtin::BI__sync_val_compare_and_swap_1:
1605
43
  case Builtin::BI__sync_val_compare_and_swap_2:
1606
43
  case Builtin::BI__sync_val_compare_and_swap_4:
1607
43
  case Builtin::BI__sync_val_compare_and_swap_8:
1608
43
  case Builtin::BI__sync_val_compare_and_swap_16:
1609
43
    return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1610
43
1611
39
  case Builtin::BI__sync_bool_compare_and_swap_1:
1612
39
  case Builtin::BI__sync_bool_compare_and_swap_2:
1613
39
  case Builtin::BI__sync_bool_compare_and_swap_4:
1614
39
  case Builtin::BI__sync_bool_compare_and_swap_8:
1615
39
  case Builtin::BI__sync_bool_compare_and_swap_16:
1616
39
    return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1617
39
1618
1
  case Builtin::BI__sync_swap_1:
1619
1
  case Builtin::BI__sync_swap_2:
1620
1
  case Builtin::BI__sync_swap_4:
1621
1
  case Builtin::BI__sync_swap_8:
1622
1
  case Builtin::BI__sync_swap_16:
1623
1
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1624
1
1625
122
  case Builtin::BI__sync_lock_test_and_set_1:
1626
122
  case Builtin::BI__sync_lock_test_and_set_2:
1627
122
  case Builtin::BI__sync_lock_test_and_set_4:
1628
122
  case Builtin::BI__sync_lock_test_and_set_8:
1629
122
  case Builtin::BI__sync_lock_test_and_set_16:
1630
122
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1631
122
1632
11
  case Builtin::BI__sync_lock_release_1:
1633
11
  case Builtin::BI__sync_lock_release_2:
1634
11
  case Builtin::BI__sync_lock_release_4:
1635
11
  case Builtin::BI__sync_lock_release_8:
1636
11
  case Builtin::BI__sync_lock_release_16: {
1637
11
    Value *Ptr = EmitScalarExpr(E->getArg(0));
1638
11
    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1639
11
    CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1640
11
    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1641
11
                                             StoreSize.getQuantity() * 8);
1642
11
    Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1643
11
    llvm::StoreInst *Store =
1644
11
      Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1645
11
                                 StoreSize);
1646
11
    Store->setAtomic(llvm::AtomicOrdering::Release);
1647
11
    return RValue::get(nullptr);
1648
11
  }
1649
11
1650
1.01k
  case Builtin::BI__sync_synchronize: {
1651
1.01k
    // We assume this is supposed to correspond to a C++0x-style
1652
1.01k
    // sequentially-consistent fence (i.e. this is only usable for
1653
1.01k
    // synchonization, not device I/O or anything like that). This intrinsic
1654
1.01k
    // is really badly designed in the sense that in theory, there isn't
1655
1.01k
    // any way to safely use it... but in practice, it mostly works
1656
1.01k
    // to use it with non-atomic loads and stores to get acquire/release
1657
1.01k
    // semantics.
1658
1.01k
    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1659
1.01k
    return RValue::get(nullptr);
1660
11
  }
1661
11
1662
30
  case Builtin::BI__builtin_nontemporal_load:
1663
30
    return RValue::get(EmitNontemporalLoad(*this, E));
1664
64
  case Builtin::BI__builtin_nontemporal_store:
1665
64
    return RValue::get(EmitNontemporalStore(*this, E));
1666
20
  case Builtin::BI__c11_atomic_is_lock_free:
1667
20
  case Builtin::BI__atomic_is_lock_free: {
1668
20
    // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1669
20
    // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1670
20
    // _Atomic(T) is always properly-aligned.
1671
20
    const char *LibCallName = "__atomic_is_lock_free";
1672
20
    CallArgList Args;
1673
20
    Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1674
20
             getContext().getSizeType());
1675
20
    if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1676
16
      Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1677
16
               getContext().VoidPtrTy);
1678
20
    else
1679
4
      Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1680
4
               getContext().VoidPtrTy);
1681
20
    const CGFunctionInfo &FuncInfo =
1682
20
        CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1683
20
    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1684
20
    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1685
20
    return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1686
20
                    ReturnValueSlot(), Args);
1687
20
  }
1688
20
1689
8
  case Builtin::BI__atomic_test_and_set: {
1690
8
    // Look at the argument type to determine whether this is a volatile
1691
8
    // operation. The parameter type is always volatile.
1692
8
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1693
8
    bool Volatile =
1694
8
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1695
8
1696
8
    Value *Ptr = EmitScalarExpr(E->getArg(0));
1697
8
    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1698
8
    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1699
8
    Value *NewVal = Builder.getInt8(1);
1700
8
    Value *Order = EmitScalarExpr(E->getArg(1));
1701
8
    if (
isa<llvm::ConstantInt>(Order)8
) {
1702
8
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1703
8
      AtomicRMWInst *Result = nullptr;
1704
8
      switch (ord) {
1705
0
      case 0:  // memory_order_relaxed
1706
0
      default: // invalid order
1707
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1708
0
                                         llvm::AtomicOrdering::Monotonic);
1709
0
        break;
1710
4
      case 1: // memory_order_consume
1711
4
      case 2: // memory_order_acquire
1712
4
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1713
4
                                         llvm::AtomicOrdering::Acquire);
1714
4
        break;
1715
0
      case 3: // memory_order_release
1716
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1717
0
                                         llvm::AtomicOrdering::Release);
1718
0
        break;
1719
0
      case 4: // memory_order_acq_rel
1720
0
1721
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1722
0
                                         llvm::AtomicOrdering::AcquireRelease);
1723
0
        break;
1724
4
      case 5: // memory_order_seq_cst
1725
4
        Result = Builder.CreateAtomicRMW(
1726
4
            llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1727
4
            llvm::AtomicOrdering::SequentiallyConsistent);
1728
4
        break;
1729
8
      }
1730
8
      Result->setVolatile(Volatile);
1731
8
      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1732
8
    }
1733
0
1734
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1735
0
1736
0
    llvm::BasicBlock *BBs[5] = {
1737
0
      createBasicBlock("monotonic", CurFn),
1738
0
      createBasicBlock("acquire", CurFn),
1739
0
      createBasicBlock("release", CurFn),
1740
0
      createBasicBlock("acqrel", CurFn),
1741
0
      createBasicBlock("seqcst", CurFn)
1742
0
    };
1743
0
    llvm::AtomicOrdering Orders[5] = {
1744
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1745
0
        llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1746
0
        llvm::AtomicOrdering::SequentiallyConsistent};
1747
0
1748
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1749
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1750
0
1751
0
    Builder.SetInsertPoint(ContBB);
1752
0
    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1753
0
1754
0
    for (unsigned i = 0; 
i < 50
;
++i0
) {
1755
0
      Builder.SetInsertPoint(BBs[i]);
1756
0
      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1757
0
                                                   Ptr, NewVal, Orders[i]);
1758
0
      RMW->setVolatile(Volatile);
1759
0
      Result->addIncoming(RMW, BBs[i]);
1760
0
      Builder.CreateBr(ContBB);
1761
0
    }
1762
0
1763
0
    SI->addCase(Builder.getInt32(0), BBs[0]);
1764
0
    SI->addCase(Builder.getInt32(1), BBs[1]);
1765
0
    SI->addCase(Builder.getInt32(2), BBs[1]);
1766
0
    SI->addCase(Builder.getInt32(3), BBs[2]);
1767
0
    SI->addCase(Builder.getInt32(4), BBs[3]);
1768
0
    SI->addCase(Builder.getInt32(5), BBs[4]);
1769
0
1770
0
    Builder.SetInsertPoint(ContBB);
1771
0
    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1772
0
  }
1773
0
1774
8
  case Builtin::BI__atomic_clear: {
1775
8
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1776
8
    bool Volatile =
1777
8
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1778
8
1779
8
    Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1780
8
    unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1781
8
    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1782
8
    Value *NewVal = Builder.getInt8(0);
1783
8
    Value *Order = EmitScalarExpr(E->getArg(1));
1784
8
    if (
isa<llvm::ConstantInt>(Order)8
) {
1785
8
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1786
8
      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1787
8
      switch (ord) {
1788
0
      case 0:  // memory_order_relaxed
1789
0
      default: // invalid order
1790
0
        Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1791
0
        break;
1792
4
      case 3:  // memory_order_release
1793
4
        Store->setOrdering(llvm::AtomicOrdering::Release);
1794
4
        break;
1795
4
      case 5:  // memory_order_seq_cst
1796
4
        Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1797
4
        break;
1798
8
      }
1799
8
      return RValue::get(nullptr);
1800
8
    }
1801
0
1802
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1803
0
1804
0
    llvm::BasicBlock *BBs[3] = {
1805
0
      createBasicBlock("monotonic", CurFn),
1806
0
      createBasicBlock("release", CurFn),
1807
0
      createBasicBlock("seqcst", CurFn)
1808
0
    };
1809
0
    llvm::AtomicOrdering Orders[3] = {
1810
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1811
0
        llvm::AtomicOrdering::SequentiallyConsistent};
1812
0
1813
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1814
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1815
0
1816
0
    for (unsigned i = 0; 
i < 30
;
++i0
) {
1817
0
      Builder.SetInsertPoint(BBs[i]);
1818
0
      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1819
0
      Store->setOrdering(Orders[i]);
1820
0
      Builder.CreateBr(ContBB);
1821
0
    }
1822
0
1823
0
    SI->addCase(Builder.getInt32(0), BBs[0]);
1824
0
    SI->addCase(Builder.getInt32(3), BBs[1]);
1825
0
    SI->addCase(Builder.getInt32(5), BBs[2]);
1826
0
1827
0
    Builder.SetInsertPoint(ContBB);
1828
0
    return RValue::get(nullptr);
1829
0
  }
1830
0
1831
64
  case Builtin::BI__atomic_thread_fence:
1832
64
  case Builtin::BI__atomic_signal_fence:
1833
64
  case Builtin::BI__c11_atomic_thread_fence:
1834
64
  case Builtin::BI__c11_atomic_signal_fence: {
1835
64
    llvm::SyncScope::ID SSID;
1836
64
    if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1837
64
        BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1838
32
      SSID = llvm::SyncScope::SingleThread;
1839
64
    else
1840
32
      SSID = llvm::SyncScope::System;
1841
64
    Value *Order = EmitScalarExpr(E->getArg(0));
1842
64
    if (
isa<llvm::ConstantInt>(Order)64
) {
1843
0
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1844
0
      switch (ord) {
1845
0
      case 0:  // memory_order_relaxed
1846
0
      default: // invalid order
1847
0
        break;
1848
0
      case 1:  // memory_order_consume
1849
0
      case 2:  // memory_order_acquire
1850
0
        Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1851
0
        break;
1852
0
      case 3:  // memory_order_release
1853
0
        Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1854
0
        break;
1855
0
      case 4:  // memory_order_acq_rel
1856
0
        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1857
0
        break;
1858
0
      case 5:  // memory_order_seq_cst
1859
0
        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1860
0
        break;
1861
0
      }
1862
0
      return RValue::get(nullptr);
1863
0
    }
1864
64
1865
64
    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1866
64
    AcquireBB = createBasicBlock("acquire", CurFn);
1867
64
    ReleaseBB = createBasicBlock("release", CurFn);
1868
64
    AcqRelBB = createBasicBlock("acqrel", CurFn);
1869
64
    SeqCstBB = createBasicBlock("seqcst", CurFn);
1870
64
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1871
64
1872
64
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1873
64
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1874
64
1875
64
    Builder.SetInsertPoint(AcquireBB);
1876
64
    Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1877
64
    Builder.CreateBr(ContBB);
1878
64
    SI->addCase(Builder.getInt32(1), AcquireBB);
1879
64
    SI->addCase(Builder.getInt32(2), AcquireBB);
1880
64
1881
64
    Builder.SetInsertPoint(ReleaseBB);
1882
64
    Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1883
64
    Builder.CreateBr(ContBB);
1884
64
    SI->addCase(Builder.getInt32(3), ReleaseBB);
1885
64
1886
64
    Builder.SetInsertPoint(AcqRelBB);
1887
64
    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1888
64
    Builder.CreateBr(ContBB);
1889
64
    SI->addCase(Builder.getInt32(4), AcqRelBB);
1890
64
1891
64
    Builder.SetInsertPoint(SeqCstBB);
1892
64
    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1893
64
    Builder.CreateBr(ContBB);
1894
64
    SI->addCase(Builder.getInt32(5), SeqCstBB);
1895
64
1896
64
    Builder.SetInsertPoint(ContBB);
1897
64
    return RValue::get(nullptr);
1898
64
  }
1899
64
1900
64
    // Library functions with special handling.
1901
478
  case Builtin::BIsqrt:
1902
478
  case Builtin::BIsqrtf:
1903
478
  case Builtin::BIsqrtl: {
1904
478
    // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1905
478
    // in finite- or unsafe-math mode (the intrinsic has different semantics
1906
478
    // for handling negative numbers compared to the library function, so
1907
478
    // -fmath-errno=0 is not enough).
1908
478
    if (!FD->hasAttr<ConstAttr>())
1909
4
      break;
1910
474
    
if (474
!(CGM.getCodeGenOpts().UnsafeFPMath ||
1911
471
          CGM.getCodeGenOpts().NoNaNsFPMath))
1912
471
      break;
1913
3
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
1914
3
    llvm::Type *ArgType = Arg0->getType();
1915
3
    Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1916
3
    return RValue::get(Builder.CreateCall(F, Arg0));
1917
3
  }
1918
3
1919
289
  case Builtin::BI__builtin_pow:
1920
289
  case Builtin::BI__builtin_powf:
1921
289
  case Builtin::BI__builtin_powl:
1922
289
  case Builtin::BIpow:
1923
289
  case Builtin::BIpowf:
1924
289
  case Builtin::BIpowl: {
1925
289
    // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1926
289
    if (!FD->hasAttr<ConstAttr>())
1927
3
      break;
1928
286
    Value *Base = EmitScalarExpr(E->getArg(0));
1929
286
    Value *Exponent = EmitScalarExpr(E->getArg(1));
1930
286
    llvm::Type *ArgType = Base->getType();
1931
286
    Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1932
286
    return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1933
286
  }
1934
286
1935
9
  case Builtin::BIfma:
1936
9
  case Builtin::BIfmaf:
1937
9
  case Builtin::BIfmal:
1938
9
  case Builtin::BI__builtin_fma:
1939
9
  case Builtin::BI__builtin_fmaf:
1940
9
  case Builtin::BI__builtin_fmal: {
1941
9
    // Rewrite fma to intrinsic.
1942
9
    Value *FirstArg = EmitScalarExpr(E->getArg(0));
1943
9
    llvm::Type *ArgType = FirstArg->getType();
1944
9
    Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1945
9
    return RValue::get(
1946
9
        Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1947
9
                               EmitScalarExpr(E->getArg(2))}));
1948
9
  }
1949
9
1950
22
  case Builtin::BI__builtin_signbit:
1951
22
  case Builtin::BI__builtin_signbitf:
1952
22
  case Builtin::BI__builtin_signbitl: {
1953
22
    return RValue::get(
1954
22
        Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1955
22
                           ConvertType(E->getType())));
1956
22
  }
1957
10
  case Builtin::BI__annotation: {
1958
10
    // Re-encode each wide string to UTF8 and make an MDString.
1959
10
    SmallVector<Metadata *, 1> Strings;
1960
16
    for (const Expr *Arg : E->arguments()) {
1961
16
      const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
1962
16
      assert(Str->getCharByteWidth() == 2);
1963
16
      StringRef WideBytes = Str->getBytes();
1964
16
      std::string StrUtf8;
1965
16
      if (!convertUTF16ToUTF8String(
1966
16
              makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
1967
0
        CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
1968
0
        continue;
1969
0
      }
1970
16
      Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
1971
16
    }
1972
10
1973
10
    // Build and MDTuple of MDStrings and emit the intrinsic call.
1974
10
    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
1975
10
    MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
1976
10
    Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
1977
10
    return RValue::getIgnored();
1978
22
  }
1979
7
  case Builtin::BI__builtin_annotation: {
1980
7
    llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1981
7
    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1982
7
                                      AnnVal->getType());
1983
7
1984
7
    // Get the annotation string, go through casts. Sema requires this to be a
1985
7
    // non-wide string literal, potentially casted, so the cast<> is safe.
1986
7
    const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1987
7
    StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1988
7
    return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1989
22
  }
1990
30
  case Builtin::BI__builtin_addcb:
1991
30
  case Builtin::BI__builtin_addcs:
1992
30
  case Builtin::BI__builtin_addc:
1993
30
  case Builtin::BI__builtin_addcl:
1994
30
  case Builtin::BI__builtin_addcll:
1995
30
  case Builtin::BI__builtin_subcb:
1996
30
  case Builtin::BI__builtin_subcs:
1997
30
  case Builtin::BI__builtin_subc:
1998
30
  case Builtin::BI__builtin_subcl:
1999
30
  case Builtin::BI__builtin_subcll: {
2000
30
2001
30
    // We translate all of these builtins from expressions of the form:
2002
30
    //   int x = ..., y = ..., carryin = ..., carryout, result;
2003
30
    //   result = __builtin_addc(x, y, carryin, &carryout);
2004
30
    //
2005
30
    // to LLVM IR of the form:
2006
30
    //
2007
30
    //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2008
30
    //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2009
30
    //   %carry1 = extractvalue {i32, i1} %tmp1, 1
2010
30
    //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2011
30
    //                                                       i32 %carryin)
2012
30
    //   %result = extractvalue {i32, i1} %tmp2, 0
2013
30
    //   %carry2 = extractvalue {i32, i1} %tmp2, 1
2014
30
    //   %tmp3 = or i1 %carry1, %carry2
2015
30
    //   %tmp4 = zext i1 %tmp3 to i32
2016
30
    //   store i32 %tmp4, i32* %carryout
2017
30
2018
30
    // Scalarize our inputs.
2019
30
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
2020
30
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2021
30
    llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2022
30
    Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2023
30
2024
30
    // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2025
30
    llvm::Intrinsic::ID IntrinsicId;
2026
30
    switch (BuiltinID) {
2027
0
    
default: 0
llvm_unreachable0
("Unknown multiprecision builtin id.");
2028
15
    case Builtin::BI__builtin_addcb:
2029
15
    case Builtin::BI__builtin_addcs:
2030
15
    case Builtin::BI__builtin_addc:
2031
15
    case Builtin::BI__builtin_addcl:
2032
15
    case Builtin::BI__builtin_addcll:
2033
15
      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2034
15
      break;
2035
15
    case Builtin::BI__builtin_subcb:
2036
15
    case Builtin::BI__builtin_subcs:
2037
15
    case Builtin::BI__builtin_subc:
2038
15
    case Builtin::BI__builtin_subcl:
2039
15
    case Builtin::BI__builtin_subcll:
2040
15
      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2041
15
      break;
2042
30
    }
2043
30
2044
30
    // Construct our resulting LLVM IR expression.
2045
30
    llvm::Value *Carry1;
2046
30
    llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2047
30
                                              X, Y, Carry1);
2048
30
    llvm::Value *Carry2;
2049
30
    llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2050
30
                                              Sum1, Carryin, Carry2);
2051
30
    llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2052
30
                                               X->getType());
2053
30
    Builder.CreateStore(CarryOut, CarryOutPtr);
2054
30
    return RValue::get(Sum2);
2055
30
  }
2056
30
2057
33
  case Builtin::BI__builtin_add_overflow:
2058
33
  case Builtin::BI__builtin_sub_overflow:
2059
33
  case Builtin::BI__builtin_mul_overflow: {
2060
33
    const clang::Expr *LeftArg = E->getArg(0);
2061
33
    const clang::Expr *RightArg = E->getArg(1);
2062
33
    const clang::Expr *ResultArg = E->getArg(2);
2063
33
2064
33
    clang::QualType ResultQTy =
2065
33
        ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2066
33
2067
33
    WidthAndSignedness LeftInfo =
2068
33
        getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2069
33
    WidthAndSignedness RightInfo =
2070
33
        getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2071
33
    WidthAndSignedness ResultInfo =
2072
33
        getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2073
33
    WidthAndSignedness EncompassingInfo =
2074
33
        EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2075
33
2076
33
    llvm::Type *EncompassingLLVMTy =
2077
33
        llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2078
33
2079
33
    llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2080
33
2081
33
    llvm::Intrinsic::ID IntrinsicId;
2082
33
    switch (BuiltinID) {
2083
0
    default:
2084
0
      llvm_unreachable("Unknown overflow builtin id.");
2085
21
    case Builtin::BI__builtin_add_overflow:
2086
21
      IntrinsicId = EncompassingInfo.Signed
2087
9
                        ? llvm::Intrinsic::sadd_with_overflow
2088
12
                        : llvm::Intrinsic::uadd_with_overflow;
2089
21
      break;
2090
6
    case Builtin::BI__builtin_sub_overflow:
2091
6
      IntrinsicId = EncompassingInfo.Signed
2092
3
                        ? llvm::Intrinsic::ssub_with_overflow
2093
3
                        : llvm::Intrinsic::usub_with_overflow;
2094
6
      break;
2095
6
    case Builtin::BI__builtin_mul_overflow:
2096
6
      IntrinsicId = EncompassingInfo.Signed
2097
3
                        ? llvm::Intrinsic::smul_with_overflow
2098
3
                        : llvm::Intrinsic::umul_with_overflow;
2099
6
      break;
2100
33
    }
2101
33
2102
33
    llvm::Value *Left = EmitScalarExpr(LeftArg);
2103
33
    llvm::Value *Right = EmitScalarExpr(RightArg);
2104
33
    Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2105
33
2106
33
    // Extend each operand to the encompassing type.
2107
33
    Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2108
33
    Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2109
33
2110
33
    // Perform the operation on the extended values.
2111
33
    llvm::Value *Overflow, *Result;
2112
33
    Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2113
33
2114
33
    if (
EncompassingInfo.Width > ResultInfo.Width33
) {
2115
6
      // The encompassing type is wider than the result type, so we need to
2116
6
      // truncate it.
2117
6
      llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2118
6
2119
6
      // To see if the truncation caused an overflow, we will extend
2120
6
      // the result and then compare it to the original result.
2121
6
      llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2122
6
          ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2123
6
      llvm::Value *TruncationOverflow =
2124
6
          Builder.CreateICmpNE(Result, ResultTruncExt);
2125
6
2126
6
      Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2127
6
      Result = ResultTrunc;
2128
6
    }
2129
33
2130
33
    // Finally, store the result using the pointer.
2131
33
    bool isVolatile =
2132
33
      ResultArg->getType()->getPointeeType().isVolatileQualified();
2133
33
    Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2134
33
2135
33
    return RValue::get(Overflow);
2136
33
  }
2137
33
2138
54
  case Builtin::BI__builtin_uadd_overflow:
2139
54
  case Builtin::BI__builtin_uaddl_overflow:
2140
54
  case Builtin::BI__builtin_uaddll_overflow:
2141
54
  case Builtin::BI__builtin_usub_overflow:
2142
54
  case Builtin::BI__builtin_usubl_overflow:
2143
54
  case Builtin::BI__builtin_usubll_overflow:
2144
54
  case Builtin::BI__builtin_umul_overflow:
2145
54
  case Builtin::BI__builtin_umull_overflow:
2146
54
  case Builtin::BI__builtin_umulll_overflow:
2147
54
  case Builtin::BI__builtin_sadd_overflow:
2148
54
  case Builtin::BI__builtin_saddl_overflow:
2149
54
  case Builtin::BI__builtin_saddll_overflow:
2150
54
  case Builtin::BI__builtin_ssub_overflow:
2151
54
  case Builtin::BI__builtin_ssubl_overflow:
2152
54
  case Builtin::BI__builtin_ssubll_overflow:
2153
54
  case Builtin::BI__builtin_smul_overflow:
2154
54
  case Builtin::BI__builtin_smull_overflow:
2155
54
  case Builtin::BI__builtin_smulll_overflow: {
2156
54
2157
54
    // We translate all of these builtins directly to the relevant llvm IR node.
2158
54
2159
54
    // Scalarize our inputs.
2160
54
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
2161
54
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2162
54
    Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2163
54
2164
54
    // Decide which of the overflow intrinsics we are lowering to:
2165
54
    llvm::Intrinsic::ID IntrinsicId;
2166
54
    switch (BuiltinID) {
2167
0
    
default: 0
llvm_unreachable0
("Unknown overflow builtin id.");
2168
9
    case Builtin::BI__builtin_uadd_overflow:
2169
9
    case Builtin::BI__builtin_uaddl_overflow:
2170
9
    case Builtin::BI__builtin_uaddll_overflow:
2171
9
      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2172
9
      break;
2173
9
    case Builtin::BI__builtin_usub_overflow:
2174
9
    case Builtin::BI__builtin_usubl_overflow:
2175
9
    case Builtin::BI__builtin_usubll_overflow:
2176
9
      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2177
9
      break;
2178
9
    case Builtin::BI__builtin_umul_overflow:
2179
9
    case Builtin::BI__builtin_umull_overflow:
2180
9
    case Builtin::BI__builtin_umulll_overflow:
2181
9
      IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2182
9
      break;
2183
9
    case Builtin::BI__builtin_sadd_overflow:
2184
9
    case Builtin::BI__builtin_saddl_overflow:
2185
9
    case Builtin::BI__builtin_saddll_overflow:
2186
9
      IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2187
9
      break;
2188
9
    case Builtin::BI__builtin_ssub_overflow:
2189
9
    case Builtin::BI__builtin_ssubl_overflow:
2190
9
    case Builtin::BI__builtin_ssubll_overflow:
2191
9
      IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2192
9
      break;
2193
9
    case Builtin::BI__builtin_smul_overflow:
2194
9
    case Builtin::BI__builtin_smull_overflow:
2195
9
    case Builtin::BI__builtin_smulll_overflow:
2196
9
      IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2197
9
      break;
2198
54
    }
2199
54
2200
54
2201
54
    llvm::Value *Carry;
2202
54
    llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2203
54
    Builder.CreateStore(Sum, SumOutPtr);
2204
54
2205
54
    return RValue::get(Carry);
2206
54
  }
2207
1
  case Builtin::BI__builtin_addressof:
2208
1
    return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2209
2
  case Builtin::BI__builtin_operator_new:
2210
2
    return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2211
2
                                    E->getArg(0), false);
2212
2
  case Builtin::BI__builtin_operator_delete:
2213
2
    return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2214
2
                                    E->getArg(0), true);
2215
6
  case Builtin::BI__noop:
2216
6
    // __noop always evaluates to an integer literal zero.
2217
6
    return RValue::get(ConstantInt::get(IntTy, 0));
2218
8
  case Builtin::BI__builtin_call_with_static_chain: {
2219
8
    const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2220
8
    const Expr *Chain = E->getArg(1);
2221
8
    return EmitCall(Call->getCallee()->getType(),
2222
8
                    EmitCallee(Call->getCallee()), Call, ReturnValue,
2223
8
                    EmitScalarExpr(Chain));
2224
54
  }
2225
14
  case Builtin::BI_InterlockedExchange8:
2226
14
  case Builtin::BI_InterlockedExchange16:
2227
14
  case Builtin::BI_InterlockedExchange:
2228
14
  case Builtin::BI_InterlockedExchangePointer:
2229
14
    return RValue::get(
2230
14
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2231
3
  case Builtin::BI_InterlockedCompareExchangePointer: {
2232
3
    llvm::Type *RTy;
2233
3
    llvm::IntegerType *IntType =
2234
3
      IntegerType::get(getLLVMContext(),
2235
3
                       getContext().getTypeSize(E->getType()));
2236
3
    llvm::Type *IntPtrType = IntType->getPointerTo();
2237
3
2238
3
    llvm::Value *Destination =
2239
3
      Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2240
3
2241
3
    llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2242
3
    RTy = Exchange->getType();
2243
3
    Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2244
3
2245
3
    llvm::Value *Comparand =
2246
3
      Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2247
3
2248
3
    auto Result =
2249
3
        Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2250
3
                                    AtomicOrdering::SequentiallyConsistent,
2251
3
                                    AtomicOrdering::SequentiallyConsistent);
2252
3
    Result->setVolatile(true);
2253
3
2254
3
    return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2255
3
                                                                         0),
2256
3
                                              RTy));
2257
14
  }
2258
14
  case Builtin::BI_InterlockedCompareExchange8:
2259
14
  case Builtin::BI_InterlockedCompareExchange16:
2260
14
  case Builtin::BI_InterlockedCompareExchange:
2261
14
  case Builtin::BI_InterlockedCompareExchange64: {
2262
14
    AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2263
14
        EmitScalarExpr(E->getArg(0)),
2264
14
        EmitScalarExpr(E->getArg(2)),
2265
14
        EmitScalarExpr(E->getArg(1)),
2266
14
        AtomicOrdering::SequentiallyConsistent,
2267
14
        AtomicOrdering::SequentiallyConsistent);
2268
14
      CXI->setVolatile(true);
2269
14
      return RValue::get(Builder.CreateExtractValue(CXI, 0));
2270
14
  }
2271
8
  case Builtin::BI_InterlockedIncrement16:
2272
8
  case Builtin::BI_InterlockedIncrement:
2273
8
    return RValue::get(
2274
8
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2275
8
  case Builtin::BI_InterlockedDecrement16:
2276
8
  case Builtin::BI_InterlockedDecrement:
2277
8
    return RValue::get(
2278
8
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2279
11
  case Builtin::BI_InterlockedAnd8:
2280
11
  case Builtin::BI_InterlockedAnd16:
2281
11
  case Builtin::BI_InterlockedAnd:
2282
11
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2283
11
  case Builtin::BI_InterlockedExchangeAdd8:
2284
11
  case Builtin::BI_InterlockedExchangeAdd16:
2285
11
  case Builtin::BI_InterlockedExchangeAdd:
2286
11
    return RValue::get(
2287
11
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2288
11
  case Builtin::BI_InterlockedExchangeSub8:
2289
11
  case Builtin::BI_InterlockedExchangeSub16:
2290
11
  case Builtin::BI_InterlockedExchangeSub:
2291
11
    return RValue::get(
2292
11
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2293
11
  case Builtin::BI_InterlockedOr8:
2294
11
  case Builtin::BI_InterlockedOr16:
2295
11
  case Builtin::BI_InterlockedOr:
2296
11
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2297
11
  case Builtin::BI_InterlockedXor8:
2298
11
  case Builtin::BI_InterlockedXor16:
2299
11
  case Builtin::BI_InterlockedXor:
2300
11
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2301
5
  case Builtin::BI_interlockedbittestandset:
2302
5
    return RValue::get(
2303
5
        EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2304
11
2305
14
  case Builtin::BI__exception_code:
2306
14
  case Builtin::BI_exception_code:
2307
14
    return RValue::get(EmitSEHExceptionCode());
2308
0
  case Builtin::BI__exception_info:
2309
0
  case Builtin::BI_exception_info:
2310
0
    return RValue::get(EmitSEHExceptionInfo());
2311
2
  case Builtin::BI__abnormal_termination:
2312
2
  case Builtin::BI_abnormal_termination:
2313
2
    return RValue::get(EmitSEHAbnormalTermination());
2314
4
  case Builtin::BI_setjmpex: {
2315
4
    if (
getTarget().getTriple().isOSMSVCRT()4
) {
2316
4
      llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2317
4
      llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2318
4
          getLLVMContext(), llvm::AttributeList::FunctionIndex,
2319
4
          llvm::Attribute::ReturnsTwice);
2320
4
      llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2321
4
          llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2322
4
          "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2323
4
      llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2324
4
          EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2325
4
      llvm::Value *FrameAddr =
2326
4
          Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2327
4
                             ConstantInt::get(Int32Ty, 0));
2328
4
      llvm::Value *Args[] = {Buf, FrameAddr};
2329
4
      llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2330
4
      CS.setAttributes(ReturnsTwiceAttr);
2331
4
      return RValue::get(CS.getInstruction());
2332
4
    }
2333
0
    break;
2334
0
  }
2335
9
  case Builtin::BI_setjmp: {
2336
9
    if (
getTarget().getTriple().isOSMSVCRT()9
) {
2337
4
      llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2338
4
          getLLVMContext(), llvm::AttributeList::FunctionIndex,
2339
4
          llvm::Attribute::ReturnsTwice);
2340
4
      llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2341
4
          EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2342
4
      llvm::CallSite CS;
2343
4
      if (
getTarget().getTriple().getArch() == llvm::Triple::x864
) {
2344
2
        llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2345
2
        llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2346
2
            llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2347
2
            "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2348
2
        llvm::Value *Count = ConstantInt::get(IntTy, 0);
2349
2
        llvm::Value *Args[] = {Buf, Count};
2350
2
        CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2351
4
      } else {
2352
2
        llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2353
2
        llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2354
2
            llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2355
2
            "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2356
2
        llvm::Value *FrameAddr =
2357
2
            Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2358
2
                               ConstantInt::get(Int32Ty, 0));
2359
2
        llvm::Value *Args[] = {Buf, FrameAddr};
2360
2
        CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2361
2
      }
2362
4
      CS.setAttributes(ReturnsTwiceAttr);
2363
4
      return RValue::get(CS.getInstruction());
2364
4
    }
2365
5
    break;
2366
5
  }
2367
5
2368
4
  case Builtin::BI__GetExceptionInfo: {
2369
4
    if (llvm::GlobalVariable *GV =
2370
4
            CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2371
4
      return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2372
0
    break;
2373
0
  }
2374
0
2375
3
  case Builtin::BI__fastfail:
2376
3
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2377
0
2378
34
  case Builtin::BI__builtin_coro_size: {
2379
34
    auto & Context = getContext();
2380
34
    auto SizeTy = Context.getSizeType();
2381
34
    auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2382
34
    Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2383
34
    return RValue::get(Builder.CreateCall(F));
2384
0
  }
2385
0
2386
3
  case Builtin::BI__builtin_coro_id:
2387
3
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2388
1
  case Builtin::BI__builtin_coro_promise:
2389
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2390
2
  case Builtin::BI__builtin_coro_resume:
2391
2
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2392
133
  case Builtin::BI__builtin_coro_frame:
2393
133
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2394
44
  case Builtin::BI__builtin_coro_free:
2395
44
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2396
1
  case Builtin::BI__builtin_coro_destroy:
2397
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2398
1
  case Builtin::BI__builtin_coro_done:
2399
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2400
2
  case Builtin::BI__builtin_coro_alloc:
2401
2
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2402
2
  case Builtin::BI__builtin_coro_begin:
2403
2
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2404
1
  case Builtin::BI__builtin_coro_end:
2405
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2406
1
  case Builtin::BI__builtin_coro_suspend:
2407
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2408
1
  case Builtin::BI__builtin_coro_param:
2409
1
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2410
0
2411
0
  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2412
8
  case Builtin::BIread_pipe:
2413
8
  case Builtin::BIwrite_pipe: {
2414
8
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2415
8
          *Arg1 = EmitScalarExpr(E->getArg(1));
2416
8
    CGOpenCLRuntime OpenCLRT(CGM);
2417
8
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2418
8
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2419
8
2420
8
    // Type of the generic packet parameter.
2421
8
    unsigned GenericAS =
2422
8
        getContext().getTargetAddressSpace(LangAS::opencl_generic);
2423
8
    llvm::Type *I8PTy = llvm::PointerType::get(
2424
8
        llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2425
8
2426
8
    // Testing which overloaded version we should generate the call for.
2427
8
    if (
2U == E->getNumArgs()8
) {
2428
4
      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2429
2
                                                             : "__write_pipe_2";
2430
6
      // Creating a generic function type to be able to call with any builtin or
2431
6
      // user defined type.
2432
6
      llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2433
6
      llvm::FunctionType *FTy = llvm::FunctionType::get(
2434
6
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2435
6
      Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2436
6
      return RValue::get(
2437
6
          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2438
6
                             {Arg0, BCast, PacketSize, PacketAlign}));
2439
0
    } else {
2440
2
      assert(4 == E->getNumArgs() &&
2441
2
             "Illegal number of parameters to pipe function");
2442
1
      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2443
1
                                                             : "__write_pipe_4";
2444
2
2445
2
      llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2446
2
                              Int32Ty, Int32Ty};
2447
2
      Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2448
2
            *Arg3 = EmitScalarExpr(E->getArg(3));
2449
2
      llvm::FunctionType *FTy = llvm::FunctionType::get(
2450
2
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2451
2
      Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2452
2
      // We know the third argument is an integer type, but we may need to cast
2453
2
      // it to i32.
2454
2
      if (Arg2->getType() != Int32Ty)
2455
0
        Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2456
2
      return RValue::get(Builder.CreateCall(
2457
2
          CGM.CreateRuntimeFunction(FTy, Name),
2458
2
          {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2459
2
    }
2460
0
  }
2461
0
  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2462
0
  // functions
2463
6
  case Builtin::BIreserve_read_pipe:
2464
6
  case Builtin::BIreserve_write_pipe:
2465
6
  case Builtin::BIwork_group_reserve_read_pipe:
2466
6
  case Builtin::BIwork_group_reserve_write_pipe:
2467
6
  case Builtin::BIsub_group_reserve_read_pipe:
2468
6
  case Builtin::BIsub_group_reserve_write_pipe: {
2469
6
    // Composing the mangled name for the function.
2470
6
    const char *Name;
2471
6
    if (BuiltinID == Builtin::BIreserve_read_pipe)
2472
1
      Name = "__reserve_read_pipe";
2473
5
    else 
if (5
BuiltinID == Builtin::BIreserve_write_pipe5
)
2474
1
      Name = "__reserve_write_pipe";
2475
4
    else 
if (4
BuiltinID == Builtin::BIwork_group_reserve_read_pipe4
)
2476
1
      Name = "__work_group_reserve_read_pipe";
2477
3
    else 
if (3
BuiltinID == Builtin::BIwork_group_reserve_write_pipe3
)
2478
1
      Name = "__work_group_reserve_write_pipe";
2479
2
    else 
if (2
BuiltinID == Builtin::BIsub_group_reserve_read_pipe2
)
2480
1
      Name = "__sub_group_reserve_read_pipe";
2481
2
    else
2482
1
      Name = "__sub_group_reserve_write_pipe";
2483
6
2484
6
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2485
6
          *Arg1 = EmitScalarExpr(E->getArg(1));
2486
6
    llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2487
6
    CGOpenCLRuntime OpenCLRT(CGM);
2488
6
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2489
6
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2490
6
2491
6
    // Building the generic function prototype.
2492
6
    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2493
6
    llvm::FunctionType *FTy = llvm::FunctionType::get(
2494
6
        ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2495
6
    // We know the second argument is an integer type, but we may need to cast
2496
6
    // it to i32.
2497
6
    if (Arg1->getType() != Int32Ty)
2498
0
      Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2499
6
    return RValue::get(
2500
6
        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2501
6
                           {Arg0, Arg1, PacketSize, PacketAlign}));
2502
6
  }
2503
6
  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2504
6
  // functions
2505
6
  case Builtin::BIcommit_read_pipe:
2506
6
  case Builtin::BIcommit_write_pipe:
2507
6
  case Builtin::BIwork_group_commit_read_pipe:
2508
6
  case Builtin::BIwork_group_commit_write_pipe:
2509
6
  case Builtin::BIsub_group_commit_read_pipe:
2510
6
  case Builtin::BIsub_group_commit_write_pipe: {
2511
6
    const char *Name;
2512
6
    if (BuiltinID == Builtin::BIcommit_read_pipe)
2513
1
      Name = "__commit_read_pipe";
2514
5
    else 
if (5
BuiltinID == Builtin::BIcommit_write_pipe5
)
2515
1
      Name = "__commit_write_pipe";
2516
4
    else 
if (4
BuiltinID == Builtin::BIwork_group_commit_read_pipe4
)
2517
1
      Name = "__work_group_commit_read_pipe";
2518
3
    else 
if (3
BuiltinID == Builtin::BIwork_group_commit_write_pipe3
)
2519
1
      Name = "__work_group_commit_write_pipe";
2520
2
    else 
if (2
BuiltinID == Builtin::BIsub_group_commit_read_pipe2
)
2521
1
      Name = "__sub_group_commit_read_pipe";
2522
2
    else
2523
1
      Name = "__sub_group_commit_write_pipe";
2524
6
2525
6
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2526
6
          *Arg1 = EmitScalarExpr(E->getArg(1));
2527
6
    CGOpenCLRuntime OpenCLRT(CGM);
2528
6
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2529
6
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2530
6
2531
6
    // Building the generic function prototype.
2532
6
    llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2533
6
    llvm::FunctionType *FTy =
2534
6
        llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2535
6
                                llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2536
6
2537
6
    return RValue::get(
2538
6
        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2539
6
                           {Arg0, Arg1, PacketSize, PacketAlign}));
2540
6
  }
2541
6
  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2542
4
  case Builtin::BIget_pipe_num_packets:
2543
4
  case Builtin::BIget_pipe_max_packets: {
2544
4
    const char *Name;
2545
4
    if (BuiltinID == Builtin::BIget_pipe_num_packets)
2546
2
      Name = "__get_pipe_num_packets";
2547
4
    else
2548
2
      Name = "__get_pipe_max_packets";
2549
4
2550
4
    // Building the generic function prototype.
2551
4
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
2552
4
    CGOpenCLRuntime OpenCLRT(CGM);
2553
4
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2554
4
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2555
4
    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2556
4
    llvm::FunctionType *FTy = llvm::FunctionType::get(
2557
4
        Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2558
4
2559
4
    return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2560
4
                                          {Arg0, PacketSize, PacketAlign}));
2561
4
  }
2562
4
2563
4
  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2564
14
  case Builtin::BIto_global:
2565
14
  case Builtin::BIto_local:
2566
14
  case Builtin::BIto_private: {
2567
14
    auto Arg0 = EmitScalarExpr(E->getArg(0));
2568
14
    auto NewArgT = llvm::PointerType::get(Int8Ty,
2569
14
      CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2570
14
    auto NewRetT = llvm::PointerType::get(Int8Ty,
2571
14
      CGM.getContext().getTargetAddressSpace(
2572
14
        E->getType()->getPointeeType().getAddressSpace()));
2573
14
    auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2574
14
    llvm::Value *NewArg;
2575
14
    if (Arg0->getType()->getPointerAddressSpace() !=
2576
14
        NewArgT->getPointerAddressSpace())
2577
10
      NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2578
14
    else
2579
4
      NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2580
14
    auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2581
14
    auto NewCall =
2582
14
        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2583
14
    return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2584
14
      ConvertType(E->getType())));
2585
14
  }
2586
14
2587
14
  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2588
14
  // It contains four different overload formats specified in Table 6.13.17.1.
2589
18
  case Builtin::BIenqueue_kernel: {
2590
18
    StringRef Name; // Generated function call name
2591
18
    unsigned NumArgs = E->getNumArgs();
2592
18
2593
18
    llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2594
18
    llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2595
18
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
2596
18
2597
18
    llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2598
18
    llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2599
18
    LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2600
18
    llvm::Value *Range = NDRangeL.getAddress().getPointer();
2601
18
    llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2602
18
2603
18
    if (
NumArgs == 418
) {
2604
2
      // The most basic form of the call with parameters:
2605
2
      // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2606
2
      Name = "__enqueue_kernel_basic";
2607
2
      llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2608
2
      llvm::FunctionType *FTy = llvm::FunctionType::get(
2609
2
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2610
2
2611
2
      llvm::Value *Block = Builder.CreatePointerCast(
2612
2
          EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2613
2
2614
2
      AttrBuilder B;
2615
2
      B.addAttribute(Attribute::ByVal);
2616
2
      llvm::AttributeList ByValAttrSet =
2617
2
          llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2618
2
2619
2
      auto RTCall =
2620
2
          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2621
2
                             {Queue, Flags, Range, Block});
2622
2
      RTCall->setAttributes(ByValAttrSet);
2623
2
      return RValue::get(RTCall);
2624
2
    }
2625
18
    assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2626
16
2627
16
    // Create a temporary array to hold the sizes of local pointer arguments
2628
16
    // for the block. \p First is the position of the first size argument.
2629
14
    auto CreateArrayForSizeVar = [=](unsigned First) {
2630
14
      auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
2631
14
      auto *Arr = Builder.CreateAlloca(AT);
2632
14
      llvm::Value *Ptr;
2633
14
      // Each of the following arguments specifies the size of the corresponding
2634
14
      // argument passed to the enqueued block.
2635
14
      auto *Zero = llvm::ConstantInt::get(IntTy, 0);
2636
32
      for (unsigned I = First; 
I < NumArgs32
;
++I18
) {
2637
18
        auto *Index = llvm::ConstantInt::get(IntTy, I - First);
2638
18
        auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
2639
18
        if (I == First)
2640
14
          Ptr = GEP;
2641
18
        auto *V =
2642
18
            Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
2643
18
        Builder.CreateAlignedStore(
2644
18
            V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
2645
18
      }
2646
14
      return Ptr;
2647
14
    };
2648
16
2649
16
    // Could have events and/or vaargs.
2650
16
    if (
E->getArg(3)->getType()->isBlockPointerType()16
) {
2651
10
      // No events passed, but has variadic arguments.
2652
10
      Name = "__enqueue_kernel_vaargs";
2653
10
      auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)),
2654
10
                                              GenericVoidPtrTy);
2655
10
      auto *PtrToSizeArray = CreateArrayForSizeVar(4);
2656
10
2657
10
      // Create a vector of the arguments, as well as a constant value to
2658
10
      // express to the runtime the number of variadic arguments.
2659
10
      std::vector<llvm::Value *> Args = {Queue,
2660
10
                                         Flags,
2661
10
                                         Range,
2662
10
                                         Block,
2663
10
                                         ConstantInt::get(IntTy, NumArgs - 4),
2664
10
                                         PtrToSizeArray};
2665
10
      std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy,
2666
10
                                          RangeTy, GenericVoidPtrTy,
2667
10
                                          IntTy,   PtrToSizeArray->getType()};
2668
10
2669
10
      llvm::FunctionType *FTy = llvm::FunctionType::get(
2670
10
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2671
10
      return RValue::get(
2672
10
          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2673
10
                             llvm::ArrayRef<llvm::Value *>(Args)));
2674
10
    }
2675
6
    // Any calls now have event arguments passed.
2676
6
    
if (6
NumArgs >= 76
) {
2677
6
      llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2678
6
      llvm::Type *EventPtrTy = EventTy->getPointerTo(
2679
6
          CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2680
6
2681
6
      llvm::Value *NumEvents =
2682
6
          Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2683
6
      llvm::Value *EventList =
2684
6
          E->getArg(4)->getType()->isArrayType()
2685
4
              ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2686
2
              : EmitScalarExpr(E->getArg(4));
2687
6
      llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2688
6
      // Convert to generic address space.
2689
6
      EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2690
6
      ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2691
6
      llvm::Value *Block = Builder.CreatePointerCast(
2692
6
          EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2693
6
2694
6
      std::vector<llvm::Type *> ArgTys = {
2695
6
          QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2696
6
          EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2697
6
2698
6
      std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2699
6
                                         EventList, ClkEvent, Block};
2700
6
2701
6
      if (
NumArgs == 76
) {
2702
2
        // Has events but no variadics.
2703
2
        Name = "__enqueue_kernel_basic_events";
2704
2
        llvm::FunctionType *FTy = llvm::FunctionType::get(
2705
2
            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2706
2
        return RValue::get(
2707
2
            Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2708
2
                               llvm::ArrayRef<llvm::Value *>(Args)));
2709
2
      }
2710
4
      // Has event info and variadics
2711
4
      // Pass the number of variadics to the runtime function too.
2712
4
      Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2713
4
      ArgTys.push_back(Int32Ty);
2714
4
      Name = "__enqueue_kernel_events_vaargs";
2715
4
2716
4
      auto *PtrToSizeArray = CreateArrayForSizeVar(7);
2717
4
      Args.push_back(PtrToSizeArray);
2718
4
      ArgTys.push_back(PtrToSizeArray->getType());
2719
4
2720
4
      llvm::FunctionType *FTy = llvm::FunctionType::get(
2721
4
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2722
4
      return RValue::get(
2723
4
          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2724
4
                             llvm::ArrayRef<llvm::Value *>(Args)));
2725
4
    }
2726
0
    
LLVM_FALLTHROUGH0
;
2727
0
  }
2728
0
  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2729
0
  // parameter.
2730
4
  case Builtin::BIget_kernel_work_group_size: {
2731
4
    llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2732
4
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
2733
4
    Value *Arg = EmitScalarExpr(E->getArg(0));
2734
4
    Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2735
4
    return RValue::get(Builder.CreateCall(
2736
4
        CGM.CreateRuntimeFunction(
2737
4
            llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2738
4
            "__get_kernel_work_group_size_impl"),
2739
4
        Arg));
2740
0
  }
2741
4
  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2742
4
    llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2743
4
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
2744
4
    Value *Arg = EmitScalarExpr(E->getArg(0));
2745
4
    Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2746
4
    return RValue::get(Builder.CreateCall(
2747
4
        CGM.CreateRuntimeFunction(
2748
4
            llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2749
4
            "__get_kernel_preferred_work_group_multiple_impl"),
2750
4
        Arg));
2751
0
  }
2752
4
  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
2753
4
  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
2754
4
    llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2755
4
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
2756
4
    LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
2757
4
    llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
2758
4
    Value *Block = EmitScalarExpr(E->getArg(1));
2759
4
    Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
2760
4
    const char *Name =
2761
4
        BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
2762
2
            ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
2763
2
            : "__get_kernel_sub_group_count_for_ndrange_impl";
2764
4
    return RValue::get(Builder.CreateCall(
2765
4
        CGM.CreateRuntimeFunction(
2766
4
            llvm::FunctionType::get(
2767
4
                IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
2768
4
            Name),
2769
4
        {NDRange, Block}));
2770
4
  }
2771
4
2772
6
  case Builtin::BI__builtin_store_half:
2773
6
  case Builtin::BI__builtin_store_halff: {
2774
6
    Value *Val = EmitScalarExpr(E->getArg(0));
2775
6
    Address Address = EmitPointerWithAlignment(E->getArg(1));
2776
6
    Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
2777
6
    return RValue::get(Builder.CreateStore(HalfVal, Address));
2778
6
  }
2779
3
  case Builtin::BI__builtin_load_half: {
2780
3
    Address Address = EmitPointerWithAlignment(E->getArg(0));
2781
3
    Value *HalfVal = Builder.CreateLoad(Address);
2782
3
    return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
2783
6
  }
2784
3
  case Builtin::BI__builtin_load_halff: {
2785
3
    Address Address = EmitPointerWithAlignment(E->getArg(0));
2786
3
    Value *HalfVal = Builder.CreateLoad(Address);
2787
3
    return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
2788
6
  }
2789
53.3k
  case Builtin::BIprintf:
2790
53.3k
    if (getTarget().getTriple().isNVPTX())
2791
10
      return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2792
53.3k
    break;
2793
6
  case Builtin::BI__builtin_canonicalize:
2794
6
  case Builtin::BI__builtin_canonicalizef:
2795
6
  case Builtin::BI__builtin_canonicalizel:
2796
6
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2797
6
2798
2
  case Builtin::BI__builtin_thread_pointer: {
2799
2
    if (!getContext().getTargetInfo().isTLSSupported())
2800
0
      CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2801
2
    // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2802
2
    break;
2803
6
  }
2804
13
  case Builtin::BI__builtin_os_log_format: {
2805
13
    assert(E->getNumArgs() >= 2 &&
2806
13
           "__builtin_os_log_format takes at least 2 arguments");
2807
13
    analyze_os_log::OSLogBufferLayout Layout;
2808
13
    analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2809
13
    Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2810
13
    // Ignore argument 1, the format string. It is not currently used.
2811
13
    CharUnits Offset;
2812
13
    Builder.CreateStore(
2813
13
        Builder.getInt8(Layout.getSummaryByte()),
2814
13
        Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2815
13
    Builder.CreateStore(
2816
13
        Builder.getInt8(Layout.getNumArgsByte()),
2817
13
        Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2818
13
2819
13
    llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2820
25
    for (const auto &Item : Layout.Items) {
2821
25
      Builder.CreateStore(
2822
25
          Builder.getInt8(Item.getDescriptorByte()),
2823
25
          Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2824
25
      Builder.CreateStore(
2825
25
          Builder.getInt8(Item.getSizeByte()),
2826
25
          Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2827
25
      Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2828
25
      if (const Expr *
TheExpr25
= Item.getExpr()) {
2829
21
        Addr = Builder.CreateElementBitCast(
2830
21
            Addr, ConvertTypeForMem(TheExpr->getType()));
2831
21
        // Check if this is a retainable type.
2832
21
        if (
TheExpr->getType()->isObjCRetainableType()21
) {
2833
1
          assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2834
1
                 "Only scalar can be a ObjC retainable type");
2835
1
          llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2836
1
          RValue RV = RValue::get(SV);
2837
1
          LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2838
1
          EmitStoreThroughLValue(RV, LV);
2839
1
          // Check if the object is constant, if not, save it in
2840
1
          // RetainableOperands.
2841
1
          if (!isa<Constant>(SV))
2842
1
            RetainableOperands.push_back(SV);
2843
21
        } else {
2844
20
          EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2845
20
        }
2846
25
      } else {
2847
4
        Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2848
4
        Builder.CreateStore(
2849
4
            Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2850
4
      }
2851
25
      Offset += Item.size();
2852
25
    }
2853
13
2854
13
    // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2855
13
    // cleanup will cause the use to appear after the final log call, keeping
2856
13
    // the object valid while it's held in the log buffer.  Note that if there's
2857
13
    // a release cleanup on the object, it will already be active; since
2858
13
    // cleanups are emitted in reverse order, the use will occur before the
2859
13
    // object is released.
2860
13
    if (
!RetainableOperands.empty() && 13
getLangOpts().ObjCAutoRefCount1
&&
2861
1
        CGM.getCodeGenOpts().OptimizationLevel != 0)
2862
1
      for (llvm::Value *object : RetainableOperands)
2863
1
        pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2864
13
2865
13
    return RValue::get(BufAddr.getPointer());
2866
6
  }
2867
6
2868
12
  case Builtin::BI__builtin_os_log_format_buffer_size: {
2869
12
    analyze_os_log::OSLogBufferLayout Layout;
2870
12
    analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2871
12
    return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2872
12
                                        Layout.size().getQuantity()));
2873
6
  }
2874
6
2875
4
  case Builtin::BI__xray_customevent: {
2876
4
    if (!ShouldXRayInstrumentFunction())
2877
0
      return RValue::getIgnored();
2878
4
    
if (const auto *4
XRayAttr4
= CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2879
4
      if (XRayAttr->neverXRayInstrument())
2880
1
        return RValue::getIgnored();
2881
3
    }
2882
3
    Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2883
3
    auto FTy = F->getFunctionType();
2884
3
    auto Arg0 = E->getArg(0);
2885
3
    auto Arg0Val = EmitScalarExpr(Arg0);
2886
3
    auto Arg0Ty = Arg0->getType();
2887
3
    auto PTy0 = FTy->getParamType(0);
2888
3
    if (
PTy0 != Arg0Val->getType()3
) {
2889
0
      if (Arg0Ty->isArrayType())
2890
0
        Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2891
0
      else
2892
0
        Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2893
0
    }
2894
3
    auto Arg1 = EmitScalarExpr(E->getArg(1));
2895
3
    auto PTy1 = FTy->getParamType(1);
2896
3
    if (PTy1 != Arg1->getType())
2897
3
      Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2898
3
    return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2899
3
  }
2900
3
2901
8
  case Builtin::BI__builtin_ms_va_start:
2902
8
  case Builtin::BI__builtin_ms_va_end:
2903
8
    return RValue::get(
2904
8
        EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
2905
8
                       BuiltinID == Builtin::BI__builtin_ms_va_start));
2906
8
2907
6
  case Builtin::BI__builtin_ms_va_copy: {
2908
6
    // Lower this manually. We can't reliably determine whether or not any
2909
6
    // given va_copy() is for a Win64 va_list from the calling convention
2910
6
    // alone, because it's legal to do this from a System V ABI function.
2911
6
    // With opaque pointer types, we won't have enough information in LLVM
2912
6
    // IR to determine this from the argument types, either. Best to do it
2913
6
    // now, while we have enough information.
2914
6
    Address DestAddr = EmitMSVAListRef(E->getArg(0));
2915
6
    Address SrcAddr = EmitMSVAListRef(E->getArg(1));
2916
6
2917
6
    llvm::Type *BPP = Int8PtrPtrTy;
2918
6
2919
6
    DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
2920
6
                       DestAddr.getAlignment());
2921
6
    SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
2922
6
                      SrcAddr.getAlignment());
2923
6
2924
6
    Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
2925
6
    return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
2926
132k
  }
2927
132k
  }
2928
132k
2929
132k
  // If this is an alias for a lib function (e.g. __builtin_sin), emit
2930
132k
  // the call using the normal call path, but using the unmangled
2931
132k
  // version of the function name.
2932
132k
  
if (132k
getContext().BuiltinInfo.isLibFunction(BuiltinID)132k
)
2933
5.97k
    return emitLibraryCall(*this, FD, E,
2934
5.97k
                           CGM.getBuiltinLibFunction(FD, BuiltinID));
2935
126k
2936
126k
  // If this is a predefined lib function (e.g. malloc), emit the call
2937
126k
  // using exactly the normal call path.
2938
126k
  
if (126k
getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)126k
)
2939
92.5k
    return emitLibraryCall(*this, FD, E,
2940
92.5k
                      cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2941
34.0k
2942
34.0k
  // Check that a call to a target specific builtin has the correct target
2943
34.0k
  // features.
2944
34.0k
  // This is down here to avoid non-target specific builtins, however, if
2945
34.0k
  // generic builtins start to require generic target features then we
2946
34.0k
  // can move this up to the beginning of the function.
2947
34.0k
  checkTargetFeatures(E, FD);
2948
34.0k
2949
34.0k
  // See if we have a target specific intrinsic.
2950
34.0k
  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2951
34.0k
  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2952
34.0k
  StringRef Prefix =
2953
34.0k
      llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2954
34.0k
  if (
!Prefix.empty()34.0k
) {
2955
34.0k
    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2956
34.0k
    // NOTE we dont need to perform a compatibility flag check here since the
2957
34.0k
    // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2958
34.0k
    // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2959
34.0k
    if (IntrinsicID == Intrinsic::not_intrinsic)
2960
23.7k
      IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2961
34.0k
  }
2962
34.0k
2963
34.0k
  if (
IntrinsicID != Intrinsic::not_intrinsic34.0k
) {
2964
10.3k
    SmallVector<Value*, 16> Args;
2965
10.3k
2966
10.3k
    // Find out if any arguments are required to be integer constant
2967
10.3k
    // expressions.
2968
10.3k
    unsigned ICEArguments = 0;
2969
10.3k
    ASTContext::GetBuiltinTypeError Error;
2970
10.3k
    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2971
10.3k
    assert(Error == ASTContext::GE_None && "Should not codegen an error");
2972
10.3k
2973
10.3k
    Function *F = CGM.getIntrinsic(IntrinsicID);
2974
10.3k
    llvm::FunctionType *FTy = F->getFunctionType();
2975
10.3k
2976
36.9k
    for (unsigned i = 0, e = E->getNumArgs(); 
i != e36.9k
;
++i26.5k
) {
2977
26.5k
      Value *ArgValue;
2978
26.5k
      // If this is a normal argument, just emit it as a scalar.
2979
26.5k
      if (
(ICEArguments & (1 << i)) == 026.5k
) {
2980
24.4k
        ArgValue = EmitScalarExpr(E->getArg(i));
2981
26.5k
      } else {
2982
2.12k
        // If this is required to be a constant, constant fold it so that we
2983
2.12k
        // know that the generated intrinsic gets a ConstantInt.
2984
2.12k
        llvm::APSInt Result;
2985
2.12k
        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2986
2.12k
        assert(IsConst && "Constant arg isn't actually constant?");
2987
2.12k
        (void)IsConst;
2988
2.12k
        ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2989
2.12k
      }
2990
26.5k
2991
26.5k
      // If the intrinsic arg type is different from the builtin arg type
2992
26.5k
      // we need to do a bit cast.
2993
26.5k
      llvm::Type *PTy = FTy->getParamType(i);
2994
26.5k
      if (
PTy != ArgValue->getType()26.5k
) {
2995
980
        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2996
980
               "Must be able to losslessly bit cast to param");
2997
980
        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2998
980
      }
2999
26.5k
3000
26.5k
      Args.push_back(ArgValue);
3001
26.5k
    }
3002
10.3k
3003
10.3k
    Value *V = Builder.CreateCall(F, Args);
3004
10.3k
    QualType BuiltinRetType = E->getType();
3005
10.3k
3006
10.3k
    llvm::Type *RetTy = VoidTy;
3007
10.3k
    if (!BuiltinRetType->isVoidType())
3008
10.0k
      RetTy = ConvertType(BuiltinRetType);
3009
10.3k
3010
10.3k
    if (
RetTy != V->getType()10.3k
) {
3011
422
      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3012
422
             "Must be able to losslessly bit cast result type");
3013
422
      V = Builder.CreateBitCast(V, RetTy);
3014
422
    }
3015
10.3k
3016
10.3k
    return RValue::get(V);
3017
10.3k
  }
3018
23.7k
3019
23.7k
  // See if we have a target specific builtin that needs to be lowered.
3020
23.7k
  
if (Value *23.7k
V23.7k
= EmitTargetBuiltinExpr(BuiltinID, E))
3021
23.7k
    return RValue::get(V);
3022
0
3023
0
  ErrorUnsupported(E, "builtin function");
3024
0
3025
0
  // Unknown builtin, for now just dump it out and return undef.
3026
0
  return GetUndefRValue(E->getType());
3027
0
}
3028
3029
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
3030
                                        unsigned BuiltinID, const CallExpr *E,
3031
23.7k
                                        llvm::Triple::ArchType Arch) {
3032
23.7k
  switch (Arch) {
3033
1.34k
  case llvm::Triple::arm:
3034
1.34k
  case llvm::Triple::armeb:
3035
1.34k
  case llvm::Triple::thumb:
3036
1.34k
  case llvm::Triple::thumbeb:
3037
1.34k
    return CGF->EmitARMBuiltinExpr(BuiltinID, E);
3038
17.3k
  case llvm::Triple::aarch64:
3039
17.3k
  case llvm::Triple::aarch64_be:
3040
17.3k
    return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
3041
2.70k
  case llvm::Triple::x86:
3042
2.70k
  case llvm::Triple::x86_64:
3043
2.70k
    return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3044
1.01k
  case llvm::Triple::ppc:
3045
1.01k
  case llvm::Triple::ppc64:
3046
1.01k
  case llvm::Triple::ppc64le:
3047
1.01k
    return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3048
105
  case llvm::Triple::r600:
3049
105
  case llvm::Triple::amdgcn:
3050
105
    return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3051
815
  case llvm::Triple::systemz:
3052
815
    return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3053
370
  case llvm::Triple::nvptx:
3054
370
  case llvm::Triple::nvptx64:
3055
370
    return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3056
8
  case llvm::Triple::wasm32:
3057
8
  case llvm::Triple::wasm64:
3058
8
    return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3059
0
  default:
3060
0
    return nullptr;
3061
0
  }
3062
0
}
3063
3064
Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
3065
23.7k
                                              const CallExpr *E) {
3066
23.7k
  if (
getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)23.7k
) {
3067
0
    assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3068
0
    return EmitTargetArchBuiltinExpr(
3069
0
        this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3070
0
        getContext().getAuxTargetInfo()->getTriple().getArch());
3071
0
  }
3072
23.7k
3073
23.7k
  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3074
23.7k
                                   getTarget().getTriple().getArch());
3075
23.7k
}
3076
3077
static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3078
                                     NeonTypeFlags TypeFlags,
3079
9.25k
                                     bool V1Ty=false) {
3080
9.25k
  int IsQuad = TypeFlags.isQuad();
3081
9.25k
  switch (TypeFlags.getEltType()) {
3082
2.46k
  case NeonTypeFlags::Int8:
3083
2.46k
  case NeonTypeFlags::Poly8:
3084
2.46k
    return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 
10
:
(8 << IsQuad)2.46k
);
3085
2.15k
  case NeonTypeFlags::Int16:
3086
2.15k
  case NeonTypeFlags::Poly16:
3087
2.15k
  case NeonTypeFlags::Float16:
3088
2.15k
    return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 
10
:
(4 << IsQuad)2.15k
);
3089
2.17k
  case NeonTypeFlags::Int32:
3090
2.17k
    return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 
10
:
(2 << IsQuad)2.17k
);
3091
1.45k
  case NeonTypeFlags::Int64:
3092
1.45k
  case NeonTypeFlags::Poly64:
3093
1.45k
    return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 
10
:
(1 << IsQuad)1.45k
);
3094
0
  case NeonTypeFlags::Poly128:
3095
0
    // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3096
0
    // There is a lot of i128 and f128 API missing.
3097
0
    // so we use v16i8 to represent poly128 and get pattern matched.
3098
0
    return llvm::VectorType::get(CGF->Int8Ty, 16);
3099
605
  case NeonTypeFlags::Float32:
3100
605
    return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 
10
:
(2 << IsQuad)605
);
3101
403
  case NeonTypeFlags::Float64:
3102
2.46k
    return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 
10
:
(1 << IsQuad)403
);
3103
0
  }
3104
0
  
llvm_unreachable0
("Unknown vector element type!");
3105
0
}
3106
3107
static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3108
196
                                          NeonTypeFlags IntTypeFlags) {
3109
196
  int IsQuad = IntTypeFlags.isQuad();
3110
196
  switch (IntTypeFlags.getEltType()) {
3111
112
  case NeonTypeFlags::Int32:
3112
112
    return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3113
84
  case NeonTypeFlags::Int64:
3114
84
    return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3115
0
  default:
3116
0
    llvm_unreachable("Type can't be converted to floating-point!");
3117
0
  }
3118
0
}
3119
3120
218
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
3121
218
  unsigned nElts = V->getType()->getVectorNumElements();
3122
218
  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3123
218
  return Builder.CreateShuffleVector(V, V, SV, "lane");
3124
218
}
3125
3126
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3127
                                     const char *name,
3128
4.44k
                                     unsigned shift, bool rightshift) {
3129
4.44k
  unsigned j = 0;
3130
4.44k
  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3131
14.8k
       
ai != ae14.8k
;
++ai, ++j10.3k
)
3132
10.3k
    
if (10.3k
shift > 0 && 10.3k
shift == j466
)
3133
233
      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3134
10.3k
    else
3135
10.1k
      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3136
4.44k
3137
4.44k
  return Builder.CreateCall(F, Ops, name);
3138
4.44k
}
3139
3140
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3141
603
                                            bool neg) {
3142
603
  int SV = cast<ConstantInt>(V)->getSExtValue();
3143
603
  return ConstantInt::get(Ty, neg ? 
-SV173
:
SV430
);
3144
603
}
3145
3146
// \brief Right-shift a vector by a constant.
3147
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3148
                                          llvm::Type *Ty, bool usgn,
3149
140
                                          const char *name) {
3150
140
  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3151
140
3152
140
  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3153
140
  int EltSize = VTy->getScalarSizeInBits();
3154
140
3155
140
  Vec = Builder.CreateBitCast(Vec, Ty);
3156
140
3157
140
  // lshr/ashr are undefined when the shift amount is equal to the vector
3158
140
  // element size.
3159
140
  if (
ShiftAmt == EltSize140
) {
3160
10
    if (
usgn10
) {
3161
4
      // Right-shifting an unsigned value by its size yields 0.
3162
4
      return llvm::ConstantAggregateZero::get(VTy);
3163
0
    } else {
3164
6
      // Right-shifting a signed value by its size is equivalent
3165
6
      // to a shift of size-1.
3166
6
      --ShiftAmt;
3167
6
      Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3168
6
    }
3169
10
  }
3170
140
3171
136
  Shift = EmitNeonShiftVector(Shift, Ty, false);
3172
136
  if (usgn)
3173
68
    return Builder.CreateLShr(Vec, Shift, name);
3174
136
  else
3175
68
    return Builder.CreateAShr(Vec, Shift, name);
3176
0
}
3177
3178
enum {
3179
  AddRetType = (1 << 0),
3180
  Add1ArgType = (1 << 1),
3181
  Add2ArgTypes = (1 << 2),
3182
3183
  VectorizeRetType = (1 << 3),
3184
  VectorizeArgTypes = (1 << 4),
3185
3186
  InventFloatType = (1 << 5),
3187
  UnsignedAlts = (1 << 6),
3188
3189
  Use64BitVectors = (1 << 7),
3190
  Use128BitVectors = (1 << 8),
3191
3192
  Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3193
  VectorRet = AddRetType | VectorizeRetType,
3194
  VectorRetGetArgs01 =
3195
      AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3196
  FpCmpzModifiers =
3197
      AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3198
};
3199
3200
namespace {
3201
struct NeonIntrinsicInfo {
3202
  const char *NameHint;
3203
  unsigned BuiltinID;
3204
  unsigned LLVMIntrinsic;
3205
  unsigned AltLLVMIntrinsic;
3206
  unsigned TypeModifier;
3207
3208
181k
  bool operator<(unsigned RHSBuiltinID) const {
3209
181k
    return BuiltinID < RHSBuiltinID;
3210
181k
  }
3211
0
  bool operator<(const NeonIntrinsicInfo &TE) const {
3212
0
    return BuiltinID < TE.BuiltinID;
3213
0
  }
3214
};
3215
} // end anonymous namespace
3216
3217
#define NEONMAP0(NameBase) \
3218
  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3219
3220
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3221
  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3222
      Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3223
3224
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3225
  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3226
      Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3227
      TypeModifier }
3228
3229
static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3230
  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3231
  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3232
  NEONMAP1(vabs_v, arm_neon_vabs, 0),
3233
  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3234
  NEONMAP0(vaddhn_v),
3235
  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3236
  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3237
  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3238
  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3239
  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3240
  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3241
  NEONMAP1(vcage_v, arm_neon_vacge, 0),
3242
  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3243
  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3244
  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3245
  NEONMAP1(vcale_v, arm_neon_vacge, 0),
3246
  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3247
  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3248
  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3249
  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3250
  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3251
  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3252
  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3253
  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3254
  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3255
  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3256
  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3257
  NEONMAP0(vcvt_f32_v),
3258
  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3259
  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3260
  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3261
  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3262
  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3263
  NEONMAP0(vcvt_s32_v),
3264
  NEONMAP0(vcvt_s64_v),
3265
  NEONMAP0(vcvt_u32_v),
3266
  NEONMAP0(vcvt_u64_v),
3267
  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3268
  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3269
  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3270
  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3271
  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3272
  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3273
  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3274
  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3275
  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3276
  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3277
  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3278
  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3279
  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3280
  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3281
  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3282
  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3283
  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3284
  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3285
  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3286
  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3287
  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3288
  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3289
  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3290
  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3291
  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3292
  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3293
  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3294
  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3295
  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3296
  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3297
  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3298
  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3299
  NEONMAP0(vcvtq_f32_v),
3300
  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3301
  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3302
  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3303
  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3304
  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3305
  NEONMAP0(vcvtq_s32_v),
3306
  NEONMAP0(vcvtq_s64_v),
3307
  NEONMAP0(vcvtq_u32_v),
3308
  NEONMAP0(vcvtq_u64_v),
3309
  NEONMAP0(vext_v),
3310
  NEONMAP0(vextq_v),
3311
  NEONMAP0(vfma_v),
3312
  NEONMAP0(vfmaq_v),
3313
  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3314
  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3315
  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3316
  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3317
  NEONMAP0(vld1_dup_v),
3318
  NEONMAP1(vld1_v, arm_neon_vld1, 0),
3319
  NEONMAP0(vld1q_dup_v),
3320
  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3321
  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3322
  NEONMAP1(vld2_v, arm_neon_vld2, 0),
3323
  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3324
  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3325
  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3326
  NEONMAP1(vld3_v, arm_neon_vld3, 0),
3327
  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3328
  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3329
  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3330
  NEONMAP1(vld4_v, arm_neon_vld4, 0),
3331
  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3332
  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3333
  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3334
  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3335
  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3336
  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3337
  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3338
  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3339
  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3340
  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3341
  NEONMAP0(vmovl_v),
3342
  NEONMAP0(vmovn_v),
3343
  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3344
  NEONMAP0(vmull_v),
3345
  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3346
  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3347
  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3348
  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3349
  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3350
  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3351
  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3352
  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3353
  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3354
  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3355
  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3356
  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3357
  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3358
  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3359
  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3360
  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3361
  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3362
  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3363
  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3364
  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3365
  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3366
  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3367
  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3368
  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3369
  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3370
  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3371
  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3372
  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3373
  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3374
  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3375
  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3376
  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3377
  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3378
  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3379
  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3380
  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3381
  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3382
  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3383
  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3384
  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3385
  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3386
  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3387
  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3388
  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3389
  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3390
  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3391
  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3392
  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3393
  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3394
  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3395
  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3396
  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3397
  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3398
  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3399
  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3400
  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3401
  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3402
  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3403
  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3404
  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3405
  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3406
  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3407
  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3408
  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3409
  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3410
  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3411
  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3412
  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3413
  NEONMAP0(vshl_n_v),
3414
  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3415
  NEONMAP0(vshll_n_v),
3416
  NEONMAP0(vshlq_n_v),
3417
  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3418
  NEONMAP0(vshr_n_v),
3419
  NEONMAP0(vshrn_n_v),
3420
  NEONMAP0(vshrq_n_v),
3421
  NEONMAP1(vst1_v, arm_neon_vst1, 0),
3422
  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3423
  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3424
  NEONMAP1(vst2_v, arm_neon_vst2, 0),
3425
  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3426
  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3427
  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3428
  NEONMAP1(vst3_v, arm_neon_vst3, 0),
3429
  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3430
  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3431
  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3432
  NEONMAP1(vst4_v, arm_neon_vst4, 0),
3433
  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3434
  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3435
  NEONMAP0(vsubhn_v),
3436
  NEONMAP0(vtrn_v),
3437
  NEONMAP0(vtrnq_v),
3438
  NEONMAP0(vtst_v),
3439
  NEONMAP0(vtstq_v),
3440
  NEONMAP0(vuzp_v),
3441
  NEONMAP0(vuzpq_v),
3442
  NEONMAP0(vzip_v),
3443
  NEONMAP0(vzipq_v)
3444
};
3445
3446
static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3447
  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3448
  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3449
  NEONMAP0(vaddhn_v),
3450
  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3451
  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3452
  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3453
  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3454
  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3455
  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3456
  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3457
  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3458
  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3459
  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3460
  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3461
  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3462
  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3463
  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3464
  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3465
  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3466
  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3467
  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3468
  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3469
  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3470
  NEONMAP0(vcvt_f32_v),
3471
  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3472
  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3473
  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3474
  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3475
  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3476
  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3477
  NEONMAP0(vcvtq_f32_v),
3478
  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3479
  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3480
  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3481
  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3482
  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3483
  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3484
  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3485
  NEONMAP0(vext_v),
3486
  NEONMAP0(vextq_v),
3487
  NEONMAP0(vfma_v),
3488
  NEONMAP0(vfmaq_v),
3489
  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3490
  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3491
  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3492
  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3493
  NEONMAP0(vmovl_v),
3494
  NEONMAP0(vmovn_v),
3495
  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3496
  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3497
  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3498
  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3499
  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3500
  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3501
  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3502
  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3503
  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3504
  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3505
  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3506
  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3507
  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3508
  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3509
  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3510
  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3511
  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3512
  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3513
  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3514
  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3515
  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3516
  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3517
  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3518
  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3519
  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3520
  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3521
  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3522
  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3523
  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3524
  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3525
  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3526
  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3527
  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3528
  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3529
  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3530
  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3531
  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3532
  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3533
  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3534
  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3535
  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3536
  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3537
  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3538
  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3539
  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3540
  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3541
  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3542
  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3543
  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3544
  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3545
  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3546
  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3547
  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3548
  NEONMAP0(vshl_n_v),
3549
  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3550
  NEONMAP0(vshll_n_v),
3551
  NEONMAP0(vshlq_n_v),
3552
  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3553
  NEONMAP0(vshr_n_v),
3554
  NEONMAP0(vshrn_n_v),
3555
  NEONMAP0(vshrq_n_v),
3556
  NEONMAP0(vsubhn_v),
3557
  NEONMAP0(vtst_v),
3558
  NEONMAP0(vtstq_v),
3559
};
3560
3561
static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3562
  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3563
  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3564
  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3565
  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3566
  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3567
  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3568
  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3569
  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3570
  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3571
  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3572
  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3573
  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3574
  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3575
  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3576
  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3577
  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3578
  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3579
  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3580
  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3581
  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3582
  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3583
  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3584
  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3585
  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3586
  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3587
  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3588
  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3589
  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3590
  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3591
  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3592
  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3593
  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3594
  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3595
  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3596
  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3597
  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3598
  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3599
  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3600
  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3601
  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3602
  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3603
  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3604
  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3605
  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3606
  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3607
  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3608
  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3609
  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3610
  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3611
  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3612
  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3613
  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3614
  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3615
  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3616
  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3617
  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3618
  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3619
  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3620
  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3621
  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3622
  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3623
  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3624
  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3625
  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3626
  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3627
  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3628
  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3629
  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3630
  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3631
  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3632
  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3633
  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3634
  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3635
  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3636
  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3637
  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3638
  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3639
  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3640
  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3641
  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3642
  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3643
  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3644
  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3645
  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3646
  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3647
  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3648
  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3649
  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3650
  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3651
  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3652
  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3653
  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3654
  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3655
  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3656
  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3657
  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3658
  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3659
  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3660
  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3661
  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3662
  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3663
  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3664
  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3665
  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3666
  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3667
  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3668
  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3669
  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3670
  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3671
  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3672
  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3673
  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3674
  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3675
  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3676
  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3677
  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3678
  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3679
  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3680
  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3681
  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3682
  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3683
  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3684
  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3685
  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3686
  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3687
  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3688
  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3689
  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3690
  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3691
  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3692
  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3693
  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3694
  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3695
  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3696
  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3697
  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3698
  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3699
  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3700
  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3701
  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3702
  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3703
  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3704
  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3705
  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3706
  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3707
  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3708
  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3709
  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3710
  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3711
  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3712
  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3713
  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3714
  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3715
  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3716
  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3717
  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3718
  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3719
  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3720
  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3721
  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3722
  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3723
  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3724
  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3725
  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3726
  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3727
  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3728
  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3729
  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3730
  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3731
  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3732
  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3733
  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3734
  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3735
  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3736
  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3737
  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3738
  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3739
  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3740
  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3741
  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3742
  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3743
  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3744
  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3745
  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3746
  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3747
  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3748
  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3749
  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3750
  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3751
  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3752
  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3753
  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3754
};
3755
3756
#undef NEONMAP0
3757
#undef NEONMAP1
3758
#undef NEONMAP2
3759
3760
static bool NEONSIMDIntrinsicsProvenSorted = false;
3761
3762
static bool AArch64SIMDIntrinsicsProvenSorted = false;
3763
static bool AArch64SISDIntrinsicsProvenSorted = false;
3764
3765
3766
static const NeonIntrinsicInfo *
3767
findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3768
23.7k
                       unsigned BuiltinID, bool &MapProvenSorted) {
3769
23.7k
3770
#ifndef NDEBUG
3771
  if (!MapProvenSorted) {
3772
    assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3773
    MapProvenSorted = true;
3774
  }
3775
#endif
3776
3777
23.7k
  const NeonIntrinsicInfo *Builtin =
3778
23.7k
      std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3779
23.7k
3780
23.7k
  if (
Builtin != IntrinsicMap.end() && 23.7k
Builtin->BuiltinID == BuiltinID23.4k
)
3781
3.11k
    return Builtin;
3782
20.5k
3783
20.5k
  return nullptr;
3784
20.5k
}
3785
3786
Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3787
                                                   unsigned Modifier,
3788
                                                   llvm::Type *ArgType,
3789
1.71k
                                                   const CallExpr *E) {
3790
1.71k
  int VectorSize = 0;
3791
1.71k
  if (Modifier & Use64BitVectors)
3792
155
    VectorSize = 64;
3793
1.55k
  else 
if (1.55k
Modifier & Use128BitVectors1.55k
)
3794
4
    VectorSize = 128;
3795
1.71k
3796
1.71k
  // Return type.
3797
1.71k
  SmallVector<llvm::Type *, 3> Tys;
3798
1.71k
  if (
Modifier & AddRetType1.71k
) {
3799
335
    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3800
335
    if (Modifier & VectorizeRetType)
3801
58
      Ty = llvm::VectorType::get(
3802
58
          Ty, VectorSize ? 
VectorSize / Ty->getPrimitiveSizeInBits()58
:
10
);
3803
335
3804
335
    Tys.push_back(Ty);
3805
335
  }
3806
1.71k
3807
1.71k
  // Arguments.
3808
1.71k
  if (
Modifier & VectorizeArgTypes1.71k
) {
3809
113
    int Elts = VectorSize ? 
VectorSize / ArgType->getPrimitiveSizeInBits()101
:
112
;
3810
113
    ArgType = llvm::VectorType::get(ArgType, Elts);
3811
113
  }
3812
1.71k
3813
1.71k
  if (Modifier & (Add1ArgType | Add2ArgTypes))
3814
1.54k
    Tys.push_back(ArgType);
3815
1.71k
3816
1.71k
  if (Modifier & Add2ArgTypes)
3817
0
    Tys.push_back(ArgType);
3818
1.71k
3819
1.71k
  if (Modifier & InventFloatType)
3820
0
    Tys.push_back(FloatTy);
3821
1.71k
3822
1.71k
  return CGM.getIntrinsic(IntrinsicID, Tys);
3823
1.71k
}
3824
3825
static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3826
                                            const NeonIntrinsicInfo &SISDInfo,
3827
                                            SmallVectorImpl<Value *> &Ops,
3828
582
                                            const CallExpr *E) {
3829
582
  unsigned BuiltinID = SISDInfo.BuiltinID;
3830
582
  unsigned int Int = SISDInfo.LLVMIntrinsic;
3831
582
  unsigned Modifier = SISDInfo.TypeModifier;
3832
582
  const char *s = SISDInfo.NameHint;
3833
582
3834
582
  switch (BuiltinID) {
3835
12
  case NEON::BI__builtin_neon_vcled_s64:
3836
12
  case NEON::BI__builtin_neon_vcled_u64:
3837
12
  case NEON::BI__builtin_neon_vcles_f32:
3838
12
  case NEON::BI__builtin_neon_vcled_f64:
3839
12
  case NEON::BI__builtin_neon_vcltd_s64:
3840
12
  case NEON::BI__builtin_neon_vcltd_u64:
3841
12
  case NEON::BI__builtin_neon_vclts_f32:
3842
12
  case NEON::BI__builtin_neon_vcltd_f64:
3843
12
  case NEON::BI__builtin_neon_vcales_f32:
3844
12
  case NEON::BI__builtin_neon_vcaled_f64:
3845
12
  case NEON::BI__builtin_neon_vcalts_f32:
3846
12
  case NEON::BI__builtin_neon_vcaltd_f64:
3847
12
    // Only one direction of comparisons actually exist, cmle is actually a cmge
3848
12
    // with swapped operands. The table gives us the right intrinsic but we
3849
12
    // still need to do the swap.
3850
12
    std::swap(Ops[0], Ops[1]);
3851
12
    break;
3852
582
  }
3853
582
3854
582
  assert(Int && "Generic code assumes a valid intrinsic");
3855
582
3856
582
  // Determine the type(s) of this overloaded AArch64 intrinsic.
3857
582
  const Expr *Arg = E->getArg(0);
3858
582
  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3859
582
  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3860
582
3861
582
  int j = 0;
3862
582
  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3863
582
  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3864
1.51k
       
ai != ae1.51k
;
++ai, ++j936
) {
3865
936
    llvm::Type *ArgTy = ai->getType();
3866
936
    if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3867
936
             ArgTy->getPrimitiveSizeInBits())
3868
684
      continue;
3869
252
3870
936
    assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3871
252
    // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3872
252
    // it before inserting.
3873
252
    Ops[j] =
3874
252
        CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3875
252
    Ops[j] =
3876
252
        CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3877
252
  }
3878
582
3879
582
  Value *Result = CGF.EmitNeonCall(F, Ops, s);
3880
582
  llvm::Type *ResultType = CGF.ConvertType(E->getType());
3881
582
  if (ResultType->getPrimitiveSizeInBits() <
3882
582
      Result->getType()->getPrimitiveSizeInBits())
3883
159
    return CGF.Builder.CreateExtractElement(Result, C0);
3884
423
3885
423
  return CGF.Builder.CreateBitCast(Result, ResultType, s);
3886
423
}
3887
3888
Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3889
    unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3890
    const char *NameHint, unsigned Modifier, const CallExpr *E,
3891
2.53k
    SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3892
2.53k
  // Get the last argument, which specifies the vector type.
3893
2.53k
  llvm::APSInt NeonTypeConst;
3894
2.53k
  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3895
2.53k
  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3896
0
    return nullptr;
3897
2.53k
3898
2.53k
  // Determine the type of this overloaded NEON intrinsic.
3899
2.53k
  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3900
2.53k
  bool Usgn = Type.isUnsigned();
3901
2.53k
  bool Quad = Type.isQuad();
3902
2.53k
3903
2.53k
  llvm::VectorType *VTy = GetNeonType(this, Type);
3904
2.53k
  llvm::Type *Ty = VTy;
3905
2.53k
  if (!Ty)
3906
0
    return nullptr;
3907
2.53k
3908
2.53k
  
auto getAlignmentValue32 = [&](Address addr) -> Value* 2.53k
{
3909
286
    return Builder.getInt32(addr.getAlignment().getQuantity());
3910
286
  };
3911
2.53k
3912
2.53k
  unsigned Int = LLVMIntrinsic;
3913
2.53k
  if (
(Modifier & UnsignedAlts) && 2.53k
!Usgn820
)
3914
426
    Int = AltLLVMIntrinsic;
3915
2.53k
3916
2.53k
  switch (BuiltinID) {
3917
1.08k
  default: break;
3918
44
  case NEON::BI__builtin_neon_vabs_v:
3919
44
  case NEON::BI__builtin_neon_vabsq_v:
3920
44
    if (VTy->getElementType()->isFloatingPointTy())
3921
14
      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3922
30
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3923
24
  case NEON::BI__builtin_neon_vaddhn_v: {
3924
24
    llvm::VectorType *SrcTy =
3925
24
        llvm::VectorType::getExtendedElementVectorType(VTy);
3926
24
3927
24
    // %sum = add <4 x i32> %lhs, %rhs
3928
24
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3929
24
    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3930
24
    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3931
24
3932
24
    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3933
24
    Constant *ShiftAmt =
3934
24
        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3935
24
    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3936
24
3937
24
    // %res = trunc <4 x i32> %high to <4 x i16>
3938
24
    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3939
30
  }
3940
28
  case NEON::BI__builtin_neon_vcale_v:
3941
28
  case NEON::BI__builtin_neon_vcaleq_v:
3942
28
  case NEON::BI__builtin_neon_vcalt_v:
3943
28
  case NEON::BI__builtin_neon_vcaltq_v:
3944
28
    std::swap(Ops[0], Ops[1]);
3945
28
    LLVM_FALLTHROUGH;
3946
56
  case NEON::BI__builtin_neon_vcage_v:
3947
56
  case NEON::BI__builtin_neon_vcageq_v:
3948
56
  case NEON::BI__builtin_neon_vcagt_v:
3949
56
  case NEON::BI__builtin_neon_vcagtq_v: {
3950
56
    llvm::Type *VecFlt = llvm::VectorType::get(
3951
56
        VTy->getScalarSizeInBits() == 32 ? 
FloatTy32
:
DoubleTy24
,
3952
56
        VTy->getNumElements());
3953
56
    llvm::Type *Tys[] = { VTy, VecFlt };
3954
56
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3955
56
    return EmitNeonCall(F, Ops, NameHint);
3956
56
  }
3957
48
  case NEON::BI__builtin_neon_vclz_v:
3958
48
  case NEON::BI__builtin_neon_vclzq_v:
3959
48
    // We generate target-independent intrinsic, which needs a second argument
3960
48
    // for whether or not clz of zero is undefined; on ARM it isn't.
3961
48
    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3962
48
    break;
3963
16
  case NEON::BI__builtin_neon_vcvt_f32_v:
3964
16
  case NEON::BI__builtin_neon_vcvtq_f32_v:
3965
16
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3966
16
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3967
8
    return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3968
8
                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3969
28
  case NEON::BI__builtin_neon_vcvt_n_f32_v:
3970
28
  case NEON::BI__builtin_neon_vcvt_n_f64_v:
3971
28
  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3972
28
  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3973
28
    llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3974
28
    Int = Usgn ? 
LLVMIntrinsic14
:
AltLLVMIntrinsic14
;
3975
28
    Function *F = CGM.getIntrinsic(Int, Tys);
3976
28
    return EmitNeonCall(F, Ops, "vcvt_n");
3977
28
  }
3978
28
  case NEON::BI__builtin_neon_vcvt_n_s32_v:
3979
28
  case NEON::BI__builtin_neon_vcvt_n_u32_v:
3980
28
  case NEON::BI__builtin_neon_vcvt_n_s64_v:
3981
28
  case NEON::BI__builtin_neon_vcvt_n_u64_v:
3982
28
  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3983
28
  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3984
28
  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3985
28
  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3986
28
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3987
28
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3988
28
    return EmitNeonCall(F, Ops, "vcvt_n");
3989
28
  }
3990
4
  case NEON::BI__builtin_neon_vcvt_s32_v:
3991
4
  case NEON::BI__builtin_neon_vcvt_u32_v:
3992
4
  case NEON::BI__builtin_neon_vcvt_s64_v:
3993
4
  case NEON::BI__builtin_neon_vcvt_u64_v:
3994
4
  case NEON::BI__builtin_neon_vcvtq_s32_v:
3995
4
  case NEON::BI__builtin_neon_vcvtq_u32_v:
3996
4
  case NEON::BI__builtin_neon_vcvtq_s64_v:
3997
4
  case NEON::BI__builtin_neon_vcvtq_u64_v: {
3998
4
    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3999
2
    return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4000
2
                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4001
4
  }
4002
16
  case NEON::BI__builtin_neon_vcvta_s32_v:
4003
16
  case NEON::BI__builtin_neon_vcvta_s64_v:
4004
16
  case NEON::BI__builtin_neon_vcvta_u32_v:
4005
16
  case NEON::BI__builtin_neon_vcvta_u64_v:
4006
16
  case NEON::BI__builtin_neon_vcvtaq_s32_v:
4007
16
  case NEON::BI__builtin_neon_vcvtaq_s64_v:
4008
16
  case NEON::BI__builtin_neon_vcvtaq_u32_v:
4009
16
  case NEON::BI__builtin_neon_vcvtaq_u64_v:
4010
16
  case NEON::BI__builtin_neon_vcvtn_s32_v:
4011
16
  case NEON::BI__builtin_neon_vcvtn_s64_v:
4012
16
  case NEON::BI__builtin_neon_vcvtn_u32_v:
4013
16
  case NEON::BI__builtin_neon_vcvtn_u64_v:
4014
16
  case NEON::BI__builtin_neon_vcvtnq_s32_v:
4015
16
  case NEON::BI__builtin_neon_vcvtnq_s64_v:
4016
16
  case NEON::BI__builtin_neon_vcvtnq_u32_v:
4017
16
  case NEON::BI__builtin_neon_vcvtnq_u64_v:
4018
16
  case NEON::BI__builtin_neon_vcvtp_s32_v:
4019
16
  case NEON::BI__builtin_neon_vcvtp_s64_v:
4020
16
  case NEON::BI__builtin_neon_vcvtp_u32_v:
4021
16
  case NEON::BI__builtin_neon_vcvtp_u64_v:
4022
16
  case NEON::BI__builtin_neon_vcvtpq_s32_v:
4023
16
  case NEON::BI__builtin_neon_vcvtpq_s64_v:
4024
16
  case NEON::BI__builtin_neon_vcvtpq_u32_v:
4025
16
  case NEON::BI__builtin_neon_vcvtpq_u64_v:
4026
16
  case NEON::BI__builtin_neon_vcvtm_s32_v:
4027
16
  case NEON::BI__builtin_neon_vcvtm_s64_v:
4028
16
  case NEON::BI__builtin_neon_vcvtm_u32_v:
4029
16
  case NEON::BI__builtin_neon_vcvtm_u64_v:
4030
16
  case NEON::BI__builtin_neon_vcvtmq_s32_v:
4031
16
  case NEON::BI__builtin_neon_vcvtmq_s64_v:
4032
16
  case NEON::BI__builtin_neon_vcvtmq_u32_v:
4033
16
  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4034
16
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4035
16
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4036
16
  }
4037
96
  case NEON::BI__builtin_neon_vext_v:
4038
96
  case NEON::BI__builtin_neon_vextq_v: {
4039
96
    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4040
96
    SmallVector<uint32_t, 16> Indices;
4041
636
    for (unsigned i = 0, e = VTy->getNumElements(); 
i != e636
;
++i540
)
4042
540
      Indices.push_back(i+CV);
4043
96
4044
96
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4045
96
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4046
96
    return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4047
96
  }
4048
19
  case NEON::BI__builtin_neon_vfma_v:
4049
19
  case NEON::BI__builtin_neon_vfmaq_v: {
4050
19
    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4051
19
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4052
19
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4053
19
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4054
19
4055
19
    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4056
19
    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4057
19
  }
4058
27
  case NEON::BI__builtin_neon_vld1_v:
4059
27
  case NEON::BI__builtin_neon_vld1q_v: {
4060
27
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
4061
27
    Ops.push_back(getAlignmentValue32(PtrOp0));
4062
27
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4063
27
  }
4064
66
  case NEON::BI__builtin_neon_vld2_v:
4065
66
  case NEON::BI__builtin_neon_vld2q_v:
4066
66
  case NEON::BI__builtin_neon_vld3_v:
4067
66
  case NEON::BI__builtin_neon_vld3q_v:
4068
66
  case NEON::BI__builtin_neon_vld4_v:
4069
66
  case NEON::BI__builtin_neon_vld4q_v: {
4070
66
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
4071
66
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4072
66
    Value *Align = getAlignmentValue32(PtrOp1);
4073
66
    Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4074
66
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4075
66
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4076
66
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4077
66
  }
4078
25
  case NEON::BI__builtin_neon_vld1_dup_v:
4079
25
  case NEON::BI__builtin_neon_vld1q_dup_v: {
4080
25
    Value *V = UndefValue::get(Ty);
4081
25
    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4082
25
    PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4083
25
    LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4084
25
    llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4085
25
    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4086
25
    return EmitNeonSplat(Ops[0], CI);
4087
25
  }
4088
51
  case NEON::BI__builtin_neon_vld2_lane_v:
4089
51
  case NEON::BI__builtin_neon_vld2q_lane_v:
4090
51
  case NEON::BI__builtin_neon_vld3_lane_v:
4091
51
  case NEON::BI__builtin_neon_vld3q_lane_v:
4092
51
  case NEON::BI__builtin_neon_vld4_lane_v:
4093
51
  case NEON::BI__builtin_neon_vld4q_lane_v: {
4094
51
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
4095
51
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4096
204
    for (unsigned I = 2; 
I < Ops.size() - 1204
;
++I153
)
4097
153
      Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4098
51
    Ops.push_back(getAlignmentValue32(PtrOp1));
4099
51
    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4100
51
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4101
51
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4102
51
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4103
51
  }
4104
24
  case NEON::BI__builtin_neon_vmovl_v: {
4105
24
    llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4106
24
    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4107
24
    if (Usgn)
4108
12
      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4109
12
    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4110
12
  }
4111
24
  case NEON::BI__builtin_neon_vmovn_v: {
4112
24
    llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4113
24
    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4114
24
    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4115
12
  }
4116
11
  case NEON::BI__builtin_neon_vmull_v:
4117
11
    // FIXME: the integer vmull operations could be emitted in terms of pure
4118
11
    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4119
11
    // hoisting the exts outside loops. Until global ISel comes along that can
4120
11
    // see through such movement this leads to bad CodeGen. So we need an
4121
11
    // intrinsic for now.
4122
11
    Int = Usgn ? 
Intrinsic::arm_neon_vmullu5
:
Intrinsic::arm_neon_vmulls6
;
4123
11
    Int = Type.isPoly() ? 
(unsigned)Intrinsic::arm_neon_vmullp1
:
Int10
;
4124
11
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4125
12
  case NEON::BI__builtin_neon_vpadal_v:
4126
12
  case NEON::BI__builtin_neon_vpadalq_v: {
4127
12
    // The source operand type has twice as many elements of half the size.
4128
12
    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4129
12
    llvm::Type *EltTy =
4130
12
      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4131
12
    llvm::Type *NarrowTy =
4132
12
      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4133
12
    llvm::Type *Tys[2] = { Ty, NarrowTy };
4134
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4135
12
  }
4136
48
  case NEON::BI__builtin_neon_vpaddl_v:
4137
48
  case NEON::BI__builtin_neon_vpaddlq_v: {
4138
48
    // The source operand type has twice as many elements of half the size.
4139
48
    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4140
48
    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4141
48
    llvm::Type *NarrowTy =
4142
48
      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4143
48
    llvm::Type *Tys[2] = { Ty, NarrowTy };
4144
48
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4145
48
  }
4146
36
  case NEON::BI__builtin_neon_vqdmlal_v:
4147
36
  case NEON::BI__builtin_neon_vqdmlsl_v: {
4148
36
    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4149
36
    Ops[1] =
4150
36
        EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4151
36
    Ops.resize(2);
4152
36
    return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4153
36
  }
4154
64
  case NEON::BI__builtin_neon_vqshl_n_v:
4155
64
  case NEON::BI__builtin_neon_vqshlq_n_v:
4156
64
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4157
64
                        1, false);
4158
32
  case NEON::BI__builtin_neon_vqshlu_n_v:
4159
32
  case NEON::BI__builtin_neon_vqshluq_n_v:
4160
32
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4161
32
                        1, false);
4162
44
  case NEON::BI__builtin_neon_vrecpe_v:
4163
44
  case NEON::BI__builtin_neon_vrecpeq_v:
4164
44
  case NEON::BI__builtin_neon_vrsqrte_v:
4165
44
  case NEON::BI__builtin_neon_vrsqrteq_v:
4166
44
    Int = Ty->isFPOrFPVectorTy() ? 
LLVMIntrinsic28
:
AltLLVMIntrinsic16
;
4167
44
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4168
44
4169
64
  case NEON::BI__builtin_neon_vrshr_n_v:
4170
64
  case NEON::BI__builtin_neon_vrshrq_n_v:
4171
64
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4172
64
                        1, true);
4173
64
  case NEON::BI__builtin_neon_vshl_n_v:
4174
64
  case NEON::BI__builtin_neon_vshlq_n_v:
4175
64
    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4176
64
    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4177
64
                             "vshl_n");
4178
72
  case NEON::BI__builtin_neon_vshll_n_v: {
4179
72
    llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4180
72
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4181
72
    if (Usgn)
4182
36
      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4183
72
    else
4184
36
      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4185
72
    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4186
72
    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4187
64
  }
4188
42
  case NEON::BI__builtin_neon_vshrn_n_v: {
4189
42
    llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4190
42
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4191
42
    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4192
42
    if (Usgn)
4193
21
      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4194
42
    else
4195
21
      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4196
42
    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4197
64
  }
4198
70
  case NEON::BI__builtin_neon_vshr_n_v:
4199
70
  case NEON::BI__builtin_neon_vshrq_n_v:
4200
70
    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4201
142
  case NEON::BI__builtin_neon_vst1_v:
4202
142
  case NEON::BI__builtin_neon_vst1q_v:
4203
142
  case NEON::BI__builtin_neon_vst2_v:
4204
142
  case NEON::BI__builtin_neon_vst2q_v:
4205
142
  case NEON::BI__builtin_neon_vst3_v:
4206
142
  case NEON::BI__builtin_neon_vst3q_v:
4207
142
  case NEON::BI__builtin_neon_vst4_v:
4208
142
  case NEON::BI__builtin_neon_vst4q_v:
4209
142
  case NEON::BI__builtin_neon_vst2_lane_v:
4210
142
  case NEON::BI__builtin_neon_vst2q_lane_v:
4211
142
  case NEON::BI__builtin_neon_vst3_lane_v:
4212
142
  case NEON::BI__builtin_neon_vst3q_lane_v:
4213
142
  case NEON::BI__builtin_neon_vst4_lane_v:
4214
142
  case NEON::BI__builtin_neon_vst4q_lane_v: {
4215
142
    llvm::Type *Tys[] = {Int8PtrTy, Ty};
4216
142
    Ops.push_back(getAlignmentValue32(PtrOp0));
4217
142
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4218
142
  }
4219
24
  case NEON::BI__builtin_neon_vsubhn_v: {
4220
24
    llvm::VectorType *SrcTy =
4221
24
        llvm::VectorType::getExtendedElementVectorType(VTy);
4222
24
4223
24
    // %sum = add <4 x i32> %lhs, %rhs
4224
24
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4225
24
    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4226
24
    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4227
24
4228
24
    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4229
24
    Constant *ShiftAmt =
4230
24
        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4231
24
    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4232
24
4233
24
    // %res = trunc <4 x i32> %high to <4 x i16>
4234
24
    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4235
142
  }
4236
18
  case NEON::BI__builtin_neon_vtrn_v:
4237
18
  case NEON::BI__builtin_neon_vtrnq_v: {
4238
18
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4239
18
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4240
18
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4241
18
    Value *SV = nullptr;
4242
18
4243
54
    for (unsigned vi = 0; 
vi != 254
;
++vi36
) {
4244
36
      SmallVector<uint32_t, 16> Indices;
4245
162
      for (unsigned i = 0, e = VTy->getNumElements(); 
i != e162
;
i += 2126
) {
4246
126
        Indices.push_back(i+vi);
4247
126
        Indices.push_back(i+e+vi);
4248
126
      }
4249
36
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4250
36
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4251
36
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4252
36
    }
4253
18
    return SV;
4254
18
  }
4255
78
  case NEON::BI__builtin_neon_vtst_v:
4256
78
  case NEON::BI__builtin_neon_vtstq_v: {
4257
78
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4258
78
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4259
78
    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4260
78
    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4261
78
                                ConstantAggregateZero::get(Ty));
4262
78
    return Builder.CreateSExt(Ops[0], Ty, "vtst");
4263
78
  }
4264
18
  case NEON::BI__builtin_neon_vuzp_v:
4265
18
  case NEON::BI__builtin_neon_vuzpq_v: {
4266
18
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4267
18
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4268
18
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4269
18
    Value *SV = nullptr;
4270
18
4271
54
    for (unsigned vi = 0; 
vi != 254
;
++vi36
) {
4272
36
      SmallVector<uint32_t, 16> Indices;
4273
288
      for (unsigned i = 0, e = VTy->getNumElements(); 
i != e288
;
++i252
)
4274
252
        Indices.push_back(2*i+vi);
4275
36
4276
36
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4277
36
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4278
36
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4279
36
    }
4280
18
    return SV;
4281
18
  }
4282
19
  case NEON::BI__builtin_neon_vzip_v:
4283
19
  case NEON::BI__builtin_neon_vzipq_v: {
4284
19
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4285
19
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4286
19
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4287
19
    Value *SV = nullptr;
4288
19
4289
57
    for (unsigned vi = 0; 
vi != 257
;
++vi38
) {
4290
38
      SmallVector<uint32_t, 16> Indices;
4291
180
      for (unsigned i = 0, e = VTy->getNumElements(); 
i != e180
;
i += 2142
) {
4292
142
        Indices.push_back((i + vi*e) >> 1);
4293
142
        Indices.push_back(((i + vi*e) >> 1)+e);
4294
142
      }
4295
38
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4296
38
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4297
38
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4298
38
    }
4299
19
    return SV;
4300
1.12k
  }
4301
1.12k
  }
4302
1.12k
4303
2.53k
  assert(Int && "Expected valid intrinsic number");
4304
1.12k
4305
1.12k
  // Determine the type(s) of this overloaded AArch64 intrinsic.
4306
1.12k
  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4307
1.12k
4308
1.12k
  Value *Result = EmitNeonCall(F, Ops, NameHint);
4309
1.12k
  llvm::Type *ResultType = ConvertType(E->getType());
4310
1.12k
  // AArch64 intrinsic one-element vector type cast to
4311
1.12k
  // scalar type expected by the builtin
4312
1.12k
  return Builder.CreateBitCast(Result, ResultType, NameHint);
4313
1.12k
}
4314
4315
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4316
    Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4317
259
    const CmpInst::Predicate Ip, const Twine &Name) {
4318
259
  llvm::Type *OTy = Op->getType();
4319
259
4320
259
  // FIXME: this is utterly horrific. We should not be looking at previous
4321
259
  // codegen context to find out what needs doing. Unfortunately TableGen
4322
259
  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4323
259
  // (etc).
4324
259
  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4325
172
    OTy = BI->getOperand(0)->getType();
4326
259
4327
259
  Op = Builder.CreateBitCast(Op, OTy);
4328
259
  if (
OTy->getScalarType()->isFloatingPointTy()259
) {
4329
90
    Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4330
259
  } else {
4331
169
    Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4332
169
  }
4333
259
  return Builder.CreateSExt(Op, Ty, Name);
4334
259
}
4335
4336
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4337
                                 Value *ExtOp, Value *IndexOp,
4338
                                 llvm::Type *ResTy, unsigned IntID,
4339
72
                                 const char *Name) {
4340
72
  SmallVector<Value *, 2> TblOps;
4341
72
  if (ExtOp)
4342
18
    TblOps.push_back(ExtOp);
4343
72
4344
72
  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4345
72
  SmallVector<uint32_t, 16> Indices;
4346
72
  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4347
648
  for (unsigned i = 0, e = TblTy->getNumElements(); 
i != e648
;
++i576
) {
4348
576
    Indices.push_back(2*i);
4349
576
    Indices.push_back(2*i+1);
4350
576
  }
4351
72
4352
72
  int PairPos = 0, End = Ops.size() - 1;
4353
144
  while (
PairPos < End144
) {
4354
72
    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4355
72
                                                     Ops[PairPos+1], Indices,
4356
72
                                                     Name));
4357
72
    PairPos += 2;
4358
72
  }
4359
72
4360
72
  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4361
72
  // of the 128-bit lookup table with zero.
4362
72
  if (
PairPos == End72
) {
4363
36
    Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4364
36
    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4365
36
                                                     ZeroTbl, Indices, Name));
4366
36
  }
4367
72
4368
72
  Function *TblF;
4369
72
  TblOps.push_back(IndexOp);
4370
72
  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4371
72
4372
72
  return CGF.EmitNeonCall(TblF, TblOps, Name);
4373
72
}
4374
4375
1.34k
Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4376
1.34k
  unsigned Value;
4377
1.34k
  switch (BuiltinID) {
4378
1.32k
  default:
4379
1.32k
    return nullptr;
4380
2
  case ARM::BI__builtin_arm_nop:
4381
2
    Value = 0;
4382
2
    break;
4383
3
  case ARM::BI__builtin_arm_yield:
4384
3
  case ARM::BI__yield:
4385
3
    Value = 1;
4386
3
    break;
4387
3
  case ARM::BI__builtin_arm_wfe:
4388
3
  case ARM::BI__wfe:
4389
3
    Value = 2;
4390
3
    break;
4391
3
  case ARM::BI__builtin_arm_wfi:
4392
3
  case ARM::BI__wfi:
4393
3
    Value = 3;
4394
3
    break;
4395
3
  case ARM::BI__builtin_arm_sev:
4396
3
  case ARM::BI__sev:
4397
3
    Value = 4;
4398
3
    break;
4399
3
  case ARM::BI__builtin_arm_sevl:
4400
3
  case ARM::BI__sevl:
4401
3
    Value = 5;
4402
3
    break;
4403
17
  }
4404
17
4405
17
  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4406
17
                            llvm::ConstantInt::get(Int32Ty, Value));
4407
17
}
4408
4409
// Generates the IR for the read/write special register builtin,
4410
// ValueType is the type of the value that is to be written or read,
4411
// RegisterType is the type of the register being written to or read from.
4412
static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4413
                                         const CallExpr *E,
4414
                                         llvm::Type *RegisterType,
4415
                                         llvm::Type *ValueType,
4416
                                         bool IsRead,
4417
26
                                         StringRef SysReg = "") {
4418
26
  // write and register intrinsics only support 32 and 64 bit operations.
4419
26
  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4420
26
          && "Unsupported size for register.");
4421
26
4422
26
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
4423
26
  CodeGen::CodeGenModule &CGM = CGF.CGM;
4424
26
  LLVMContext &Context = CGM.getLLVMContext();
4425
26
4426
26
  if (
SysReg.empty()26
) {
4427
24
    const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4428
24
    SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4429
24
  }
4430
26
4431
26
  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4432
26
  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4433
26
  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4434
26
4435
26
  llvm::Type *Types[] = { RegisterType };
4436
26
4437
18
  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4438
26
  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4439
26
            && "Can't fit 64-bit value in 32-bit register");
4440
26
4441
26
  if (
IsRead26
) {
4442
14
    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4443
14
    llvm::Value *Call = Builder.CreateCall(F, Metadata);
4444
14
4445
14
    if (MixedTypes)
4446
14
      // Read into 64 bit register and then truncate result to 32 bit.
4447
2
      return Builder.CreateTrunc(Call, ValueType);
4448
12
4449
12
    
if (12
ValueType->isPointerTy()12
)
4450
12
      // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4451
4
      return Builder.CreateIntToPtr(Call, ValueType);
4452
8
4453
8
    return Call;
4454
8
  }
4455
12
4456
12
  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4457
12
  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4458
12
  if (
MixedTypes12
) {
4459
2
    // Extend 32 bit write value to 64 bit to pass to write.
4460
2
    ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4461
2
    return Builder.CreateCall(F, { Metadata, ArgValue });
4462
2
  }
4463
10
4464
10
  
if (10
ValueType->isPointerTy()10
) {
4465
4
    // Have VoidPtrTy ArgValue but want to return an i32/i64.
4466
4
    ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4467
4
    return Builder.CreateCall(F, { Metadata, ArgValue });
4468
4
  }
4469
6
4470
6
  return Builder.CreateCall(F, { Metadata, ArgValue });
4471
6
}
4472
4473
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4474
/// argument that specifies the vector type.
4475
1.23k
static bool HasExtraNeonArgument(unsigned BuiltinID) {
4476
1.23k
  switch (BuiltinID) {
4477
1.18k
  default: break;
4478
54
  case NEON::BI__builtin_neon_vget_lane_i8:
4479
54
  case NEON::BI__builtin_neon_vget_lane_i16:
4480
54
  case NEON::BI__builtin_neon_vget_lane_i32:
4481
54
  case NEON::BI__builtin_neon_vget_lane_i64:
4482
54
  case NEON::BI__builtin_neon_vget_lane_f32:
4483
54
  case NEON::BI__builtin_neon_vgetq_lane_i8:
4484
54
  case NEON::BI__builtin_neon_vgetq_lane_i16:
4485
54
  case NEON::BI__builtin_neon_vgetq_lane_i32:
4486
54
  case NEON::BI__builtin_neon_vgetq_lane_i64:
4487
54
  case NEON::BI__builtin_neon_vgetq_lane_f32:
4488
54
  case NEON::BI__builtin_neon_vset_lane_i8:
4489
54
  case NEON::BI__builtin_neon_vset_lane_i16:
4490
54
  case NEON::BI__builtin_neon_vset_lane_i32:
4491
54
  case NEON::BI__builtin_neon_vset_lane_i64:
4492
54
  case NEON::BI__builtin_neon_vset_lane_f32:
4493
54
  case NEON::BI__builtin_neon_vsetq_lane_i8:
4494
54
  case NEON::BI__builtin_neon_vsetq_lane_i16:
4495
54
  case NEON::BI__builtin_neon_vsetq_lane_i32:
4496
54
  case NEON::BI__builtin_neon_vsetq_lane_i64:
4497
54
  case NEON::BI__builtin_neon_vsetq_lane_f32:
4498
54
  case NEON::BI__builtin_neon_vsha1h_u32:
4499
54
  case NEON::BI__builtin_neon_vsha1cq_u32:
4500
54
  case NEON::BI__builtin_neon_vsha1pq_u32:
4501
54
  case NEON::BI__builtin_neon_vsha1mq_u32:
4502
54
  case ARM::BI_MoveToCoprocessor:
4503
54
  case ARM::BI_MoveToCoprocessor2:
4504
54
    return false;
4505
1.18k
  }
4506
1.18k
  return true;
4507
1.18k
}
4508
4509
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4510
1.34k
                                           const CallExpr *E) {
4511
1.34k
  if (auto Hint = GetValueForARMHint(BuiltinID))
4512
17
    return Hint;
4513
1.32k
4514
1.32k
  
if (1.32k
BuiltinID == ARM::BI__emit1.32k
) {
4515
2
    bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4516
2
    llvm::FunctionType *FTy =
4517
2
        llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4518
2
4519
2
    APSInt Value;
4520
2
    if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4521
0
      llvm_unreachable("Sema will ensure that the parameter is constant");
4522
2
4523
2
    
uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 2
162
:
320
).getZExtValue();
4524
2
4525
2
    llvm::InlineAsm *Emit =
4526
2
        IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4527
2
                                 /*SideEffects=*/true)
4528
0
                : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4529
0
                                 /*SideEffects=*/true);
4530
2
4531
2
    return Builder.CreateCall(Emit);
4532
1.32k
  }
4533
1.32k
4534
1.32k
  
if (1.32k
BuiltinID == ARM::BI__builtin_arm_dbg1.32k
) {
4535
2
    Value *Option = EmitScalarExpr(E->getArg(0));
4536
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4537
2
  }
4538
1.32k
4539
1.32k
  
if (1.32k
BuiltinID == ARM::BI__builtin_arm_prefetch1.32k
) {
4540
7
    Value *Address = EmitScalarExpr(E->getArg(0));
4541
7
    Value *RW      = EmitScalarExpr(E->getArg(1));
4542
7
    Value *IsData  = EmitScalarExpr(E->getArg(2));
4543
7
4544
7
    // Locality is not supported on ARM target
4545
7
    Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4546
7
4547
7
    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4548
7
    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4549
7
  }
4550
1.31k
4551
1.31k
  
if (1.31k
BuiltinID == ARM::BI__builtin_arm_rbit1.31k
) {
4552
4
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4553
4
    return Builder.CreateCall(
4554
4
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4555
4
  }
4556
1.31k
4557
1.31k
  
if (1.31k
BuiltinID == ARM::BI__clear_cache1.31k
) {
4558
2
    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4559
2
    const FunctionDecl *FD = E->getDirectCallee();
4560
2
    Value *Ops[2];
4561
6
    for (unsigned i = 0; 
i < 26
;
i++4
)
4562
4
      Ops[i] = EmitScalarExpr(E->getArg(i));
4563
2
    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4564
2
    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4565
2
    StringRef Name = FD->getName();
4566
2
    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4567
2
  }
4568
1.31k
4569
1.31k
  
if (1.31k
BuiltinID == ARM::BI__builtin_arm_mcrr ||
4570
1.31k
      
BuiltinID == ARM::BI__builtin_arm_mcrr21.31k
) {
4571
2
    Function *F;
4572
2
4573
2
    switch (BuiltinID) {
4574
0
    
default: 0
llvm_unreachable0
("unexpected builtin");
4575
1
    case ARM::BI__builtin_arm_mcrr:
4576
1
      F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4577
1
      break;
4578
1
    case ARM::BI__builtin_arm_mcrr2:
4579
1
      F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4580
1
      break;
4581
2
    }
4582
2
4583
2
    // MCRR{2} instruction has 5 operands but
4584
2
    // the intrinsic has 4 because Rt and Rt2
4585
2
    // are represented as a single unsigned 64
4586
2
    // bit integer in the intrinsic definition
4587
2
    // but internally it's represented as 2 32
4588
2
    // bit integers.
4589
2
4590
2
    Value *Coproc = EmitScalarExpr(E->getArg(0));
4591
2
    Value *Opc1 = EmitScalarExpr(E->getArg(1));
4592
2
    Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4593
2
    Value *CRm = EmitScalarExpr(E->getArg(3));
4594
2
4595
2
    Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4596
2
    Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4597
2
    Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4598
2
    Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4599
2
4600
2
    return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4601
2
  }
4602
1.30k
4603
1.30k
  
if (1.30k
BuiltinID == ARM::BI__builtin_arm_mrrc ||
4604
1.30k
      
BuiltinID == ARM::BI__builtin_arm_mrrc21.30k
) {
4605
2
    Function *F;
4606
2
4607
2
    switch (BuiltinID) {
4608
0
    
default: 0
llvm_unreachable0
("unexpected builtin");
4609
1
    case ARM::BI__builtin_arm_mrrc:
4610
1
      F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4611
1
      break;
4612
1
    case ARM::BI__builtin_arm_mrrc2:
4613
1
      F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4614
1
      break;
4615
2
    }
4616
2
4617
2
    Value *Coproc = EmitScalarExpr(E->getArg(0));
4618
2
    Value *Opc1 = EmitScalarExpr(E->getArg(1));
4619
2
    Value *CRm  = EmitScalarExpr(E->getArg(2));
4620
2
    Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4621
2
4622
2
    // Returns an unsigned 64 bit integer, represented
4623
2
    // as two 32 bit integers.
4624
2
4625
2
    Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4626
2
    Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4627
2
    Rt = Builder.CreateZExt(Rt, Int64Ty);
4628
2
    Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4629
2
4630
2
    Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4631
2
    RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4632
2
    RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4633
2
4634
2
    return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4635
2
  }
4636
1.30k
4637
1.30k
  
if (1.30k
BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4638
1.30k
      ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4639
1.29k
        BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4640
20
       getContext().getTypeSize(E->getType()) == 64) ||
4641
1.30k
      
BuiltinID == ARM::BI__ldrexd1.30k
) {
4642
7
    Function *F;
4643
7
4644
7
    switch (BuiltinID) {
4645
0
    
default: 0
llvm_unreachable0
("unexpected builtin");
4646
3
    case ARM::BI__builtin_arm_ldaex:
4647
3
      F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4648
3
      break;
4649
4
    case ARM::BI__builtin_arm_ldrexd:
4650
4
    case ARM::BI__builtin_arm_ldrex:
4651
4
    case ARM::BI__ldrexd:
4652
4
      F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4653
4
      break;
4654
7
    }
4655
7
4656
7
    Value *LdPtr = EmitScalarExpr(E->getArg(0));
4657
7
    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4658
7
                                    "ldrexd");
4659
7
4660
7
    Value *Val0 = Builder.CreateExtractValue(Val, 1);
4661
7
    Value *Val1 = Builder.CreateExtractValue(Val, 0);
4662
7
    Val0 = Builder.CreateZExt(Val0, Int64Ty);
4663
7
    Val1 = Builder.CreateZExt(Val1, Int64Ty);
4664
7
4665
7
    Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4666
7
    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4667
7
    Val = Builder.CreateOr(Val, Val1);
4668
7
    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4669
7
  }
4670
1.30k
4671
1.30k
  
if (1.30k
BuiltinID == ARM::BI__builtin_arm_ldrex ||
4672
1.30k
      
BuiltinID == ARM::BI__builtin_arm_ldaex1.29k
) {
4673
14
    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4674
14
4675
14
    QualType Ty = E->getType();
4676
14
    llvm::Type *RealResTy = ConvertType(Ty);
4677
14
    llvm::Type *PtrTy = llvm::IntegerType::get(
4678
14
        getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4679
14
    LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4680
14
4681
14
    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4682
6
                                       ? Intrinsic::arm_ldaex
4683
8
                                       : Intrinsic::arm_ldrex,
4684
14
                                   PtrTy);
4685
14
    Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4686
14
4687
14
    if (RealResTy->isPointerTy())
4688
4
      return Builder.CreateIntToPtr(Val, RealResTy);
4689
10
    else {
4690
10
      llvm::Type *IntResTy = llvm::IntegerType::get(
4691
10
          getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4692
10
      Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4693
10
      return Builder.CreateBitCast(Val, RealResTy);
4694
10
    }
4695
1.28k
  }
4696
1.28k
4697
1.28k
  
if (1.28k
BuiltinID == ARM::BI__builtin_arm_strexd ||
4698
1.28k
      ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4699
1.27k
        BuiltinID == ARM::BI__builtin_arm_strex) &&
4700
1.28k
       
getContext().getTypeSize(E->getArg(0)->getType()) == 6416
)) {
4701
4
    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4702
2
                                       ? Intrinsic::arm_stlexd
4703
2
                                       : Intrinsic::arm_strexd);
4704
4
    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4705
4
4706
4
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4707
4
    Value *Val = EmitScalarExpr(E->getArg(0));
4708
4
    Builder.CreateStore(Val, Tmp);
4709
4
4710
4
    Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4711
4
    Val = Builder.CreateLoad(LdPtr);
4712
4
4713
4
    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4714
4
    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4715
4
    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4716
4
    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4717
4
  }
4718
1.28k
4719
1.28k
  
if (1.28k
BuiltinID == ARM::BI__builtin_arm_strex ||
4720
1.28k
      
BuiltinID == ARM::BI__builtin_arm_stlex1.27k
) {
4721
12
    Value *StoreVal = EmitScalarExpr(E->getArg(0));
4722
12
    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4723
12
4724
12
    QualType Ty = E->getArg(0)->getType();
4725
12
    llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4726
12
                                                 getContext().getTypeSize(Ty));
4727
12
    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4728
12
4729
12
    if (StoreVal->getType()->isPointerTy())
4730
2
      StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4731
10
    else {
4732
10
      llvm::Type *IntTy = llvm::IntegerType::get(
4733
10
          getLLVMContext(),
4734
10
          CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4735
10
      StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4736
10
      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4737
10
    }
4738
12
4739
12
    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4740
5
                                       ? Intrinsic::arm_stlex
4741
7
                                       : Intrinsic::arm_strex,
4742
12
                                   StoreAddr->getType());
4743
12
    return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4744
12
  }
4745
1.27k
4746
1.27k
  switch (BuiltinID) {
4747
1
  case ARM::BI__iso_volatile_load8:
4748
1
  case ARM::BI__iso_volatile_load16:
4749
1
  case ARM::BI__iso_volatile_load32:
4750
1
  case ARM::BI__iso_volatile_load64: {
4751
1
    Value *Ptr = EmitScalarExpr(E->getArg(0));
4752
1
    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4753
1
    CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4754
1
    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4755
1
                                             LoadSize.getQuantity() * 8);
4756
1
    Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4757
1
    llvm::LoadInst *Load =
4758
1
      Builder.CreateAlignedLoad(Ptr, LoadSize);
4759
1
    Load->setVolatile(true);
4760
1
    return Load;
4761
1
  }
4762
1
  case ARM::BI__iso_volatile_store8:
4763
1
  case ARM::BI__iso_volatile_store16:
4764
1
  case ARM::BI__iso_volatile_store32:
4765
1
  case ARM::BI__iso_volatile_store64: {
4766
1
    Value *Ptr = EmitScalarExpr(E->getArg(0));
4767
1
    Value *Value = EmitScalarExpr(E->getArg(1));
4768
1
    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4769
1
    CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4770
1
    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4771
1
                                             StoreSize.getQuantity() * 8);
4772
1
    Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4773
1
    llvm::StoreInst *Store =
4774
1
      Builder.CreateAlignedStore(Value, Ptr,
4775
1
                                 StoreSize);
4776
1
    Store->setVolatile(true);
4777
1
    return Store;
4778
1.26k
  }
4779
1.26k
  }
4780
1.26k
4781
1.26k
  
if (1.26k
BuiltinID == ARM::BI__builtin_arm_clrex1.26k
) {
4782
1
    Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4783
1
    return Builder.CreateCall(F);
4784
1
  }
4785
1.26k
4786
1.26k
  // CRC32
4787
1.26k
  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4788
1.26k
  switch (BuiltinID) {
4789
2
  case ARM::BI__builtin_arm_crc32b:
4790
2
    CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4791
2
  case ARM::BI__builtin_arm_crc32cb:
4792
2
    CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4793
2
  case ARM::BI__builtin_arm_crc32h:
4794
2
    CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4795
2
  case ARM::BI__builtin_arm_crc32ch:
4796
2
    CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4797
4
  case ARM::BI__builtin_arm_crc32w:
4798
4
  case ARM::BI__builtin_arm_crc32d:
4799
4
    CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4800
4
  case ARM::BI__builtin_arm_crc32cw:
4801
4
  case ARM::BI__builtin_arm_crc32cd:
4802
4
    CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4803
1.26k
  }
4804
1.26k
4805
1.26k
  
if (1.26k
CRCIntrinsicID != Intrinsic::not_intrinsic1.26k
) {
4806
16
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
4807
16
    Value *Arg1 = EmitScalarExpr(E->getArg(1));
4808
16
4809
16
    // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4810
16
    // intrinsics, hence we need different codegen for these cases.
4811
16
    if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4812
16
        
BuiltinID == ARM::BI__builtin_arm_crc32cd14
) {
4813
4
      Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4814
4
      Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4815
4
      Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4816
4
      Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4817
4
4818
4
      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4819
4
      Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4820
4
      return Builder.CreateCall(F, {Res, Arg1b});
4821
0
    } else {
4822
12
      Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4823
12
4824
12
      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4825
12
      return Builder.CreateCall(F, {Arg0, Arg1});
4826
12
    }
4827
1.25k
  }
4828
1.25k
4829
1.25k
  
if (1.25k
BuiltinID == ARM::BI__builtin_arm_rsr ||
4830
1.24k
      BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4831
1.24k
      BuiltinID == ARM::BI__builtin_arm_rsrp ||
4832
1.24k
      BuiltinID == ARM::BI__builtin_arm_wsr ||
4833
1.24k
      BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4834
1.25k
      
BuiltinID == ARM::BI__builtin_arm_wsrp1.24k
) {
4835
12
4836
12
    bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4837
10
                  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4838
8
                  BuiltinID == ARM::BI__builtin_arm_rsrp;
4839
12
4840
12
    bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4841
10
                            BuiltinID == ARM::BI__builtin_arm_wsrp;
4842
12
4843
12
    bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4844
10
                   BuiltinID == ARM::BI__builtin_arm_wsr64;
4845
12
4846
12
    llvm::Type *ValueType;
4847
12
    llvm::Type *RegisterType;
4848
12
    if (
IsPointerBuiltin12
) {
4849
4
      ValueType = VoidPtrTy;
4850
4
      RegisterType = Int32Ty;
4851
12
    } else 
if (8
Is64Bit8
) {
4852
4
      ValueType = RegisterType = Int64Ty;
4853
8
    } else {
4854
4
      ValueType = RegisterType = Int32Ty;
4855
4
    }
4856
12
4857
12
    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4858
12
  }
4859
1.23k
4860
1.23k
  // Find out if any arguments are required to be integer constant
4861
1.23k
  // expressions.
4862
1.23k
  unsigned ICEArguments = 0;
4863
1.23k
  ASTContext::GetBuiltinTypeError Error;
4864
1.23k
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4865
1.23k
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
4866
1.23k
4867
46
  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4868
46
    return Builder.getInt32(addr.getAlignment().getQuantity());
4869
46
  };
4870
1.23k
4871
1.23k
  Address PtrOp0 = Address::invalid();
4872
1.23k
  Address PtrOp1 = Address::invalid();
4873
1.23k
  SmallVector<Value*, 4> Ops;
4874
1.23k
  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4875
1.23k
  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 
11.18k
:
054
);
4876
4.33k
  for (unsigned i = 0, e = NumArgs; 
i != e4.33k
;
i++3.09k
) {
4877
3.09k
    if (
i == 03.09k
) {
4878
1.23k
      switch (BuiltinID) {
4879
245
      case NEON::BI__builtin_neon_vld1_v:
4880
245
      case NEON::BI__builtin_neon_vld1q_v:
4881
245
      case NEON::BI__builtin_neon_vld1q_lane_v:
4882
245
      case NEON::BI__builtin_neon_vld1_lane_v:
4883
245
      case NEON::BI__builtin_neon_vld1_dup_v:
4884
245
      case NEON::BI__builtin_neon_vld1q_dup_v:
4885
245
      case NEON::BI__builtin_neon_vst1_v:
4886
245
      case NEON::BI__builtin_neon_vst1q_v:
4887
245
      case NEON::BI__builtin_neon_vst1q_lane_v:
4888
245
      case NEON::BI__builtin_neon_vst1_lane_v:
4889
245
      case NEON::BI__builtin_neon_vst2_v:
4890
245
      case NEON::BI__builtin_neon_vst2q_v:
4891
245
      case NEON::BI__builtin_neon_vst2_lane_v:
4892
245
      case NEON::BI__builtin_neon_vst2q_lane_v:
4893
245
      case NEON::BI__builtin_neon_vst3_v:
4894
245
      case NEON::BI__builtin_neon_vst3q_v:
4895
245
      case NEON::BI__builtin_neon_vst3_lane_v:
4896
245
      case NEON::BI__builtin_neon_vst3q_lane_v:
4897
245
      case NEON::BI__builtin_neon_vst4_v:
4898
245
      case NEON::BI__builtin_neon_vst4q_v:
4899
245
      case NEON::BI__builtin_neon_vst4_lane_v:
4900
245
      case NEON::BI__builtin_neon_vst4q_lane_v:
4901
245
        // Get the alignment for the argument in addition to the value;
4902
245
        // we'll use it later.
4903
245
        PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4904
245
        Ops.push_back(PtrOp0.getPointer());
4905
245
        continue;
4906
2.85k
      }
4907
2.85k
    }
4908
2.85k
    
if (2.85k
i == 12.85k
) {
4909
1.04k
      switch (BuiltinID) {
4910
157
      case NEON::BI__builtin_neon_vld2_v:
4911
157
      case NEON::BI__builtin_neon_vld2q_v:
4912
157
      case NEON::BI__builtin_neon_vld3_v:
4913
157
      case NEON::BI__builtin_neon_vld3q_v:
4914
157
      case NEON::BI__builtin_neon_vld4_v:
4915
157
      case NEON::BI__builtin_neon_vld4q_v:
4916
157
      case NEON::BI__builtin_neon_vld2_lane_v:
4917
157
      case NEON::BI__builtin_neon_vld2q_lane_v:
4918
157
      case NEON::BI__builtin_neon_vld3_lane_v:
4919
157
      case NEON::BI__builtin_neon_vld3q_lane_v:
4920
157
      case NEON::BI__builtin_neon_vld4_lane_v:
4921
157
      case NEON::BI__builtin_neon_vld4q_lane_v:
4922
157
      case NEON::BI__builtin_neon_vld2_dup_v:
4923
157
      case NEON::BI__builtin_neon_vld3_dup_v:
4924
157
      case NEON::BI__builtin_neon_vld4_dup_v:
4925
157
        // Get the alignment for the argument in addition to the value;
4926
157
        // we'll use it later.
4927
157
        PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4928
157
        Ops.push_back(PtrOp1.getPointer());
4929
157
        continue;
4930
2.69k
      }
4931
2.69k
    }
4932
2.69k
4933
2.69k
    
if (2.69k
(ICEArguments & (1 << i)) == 02.69k
) {
4934
2.26k
      Ops.push_back(EmitScalarExpr(E->getArg(i)));
4935
2.69k
    } else {
4936
428
      // If this is required to be a constant, constant fold it so that we know
4937
428
      // that the generated intrinsic gets a ConstantInt.
4938
428
      llvm::APSInt Result;
4939
428
      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4940
428
      assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4941
428
      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4942
428
    }
4943
3.09k
  }
4944
1.23k
4945
1.23k
  switch (BuiltinID) {
4946
1.17k
  default: break;
4947
1.23k
4948
24
  case NEON::BI__builtin_neon_vget_lane_i8:
4949
24
  case NEON::BI__builtin_neon_vget_lane_i16:
4950
24
  case NEON::BI__builtin_neon_vget_lane_i32:
4951
24
  case NEON::BI__builtin_neon_vget_lane_i64:
4952
24
  case NEON::BI__builtin_neon_vget_lane_f32:
4953
24
  case NEON::BI__builtin_neon_vgetq_lane_i8:
4954
24
  case NEON::BI__builtin_neon_vgetq_lane_i16:
4955
24
  case NEON::BI__builtin_neon_vgetq_lane_i32:
4956
24
  case NEON::BI__builtin_neon_vgetq_lane_i64:
4957
24
  case NEON::BI__builtin_neon_vgetq_lane_f32:
4958
24
    return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4959
24
4960
24
  case NEON::BI__builtin_neon_vset_lane_i8:
4961
24
  case NEON::BI__builtin_neon_vset_lane_i16:
4962
24
  case NEON::BI__builtin_neon_vset_lane_i32:
4963
24
  case NEON::BI__builtin_neon_vset_lane_i64:
4964
24
  case NEON::BI__builtin_neon_vset_lane_f32:
4965
24
  case NEON::BI__builtin_neon_vsetq_lane_i8:
4966
24
  case NEON::BI__builtin_neon_vsetq_lane_i16:
4967
24
  case NEON::BI__builtin_neon_vsetq_lane_i32:
4968
24
  case NEON::BI__builtin_neon_vsetq_lane_i64:
4969
24
  case NEON::BI__builtin_neon_vsetq_lane_f32:
4970
24
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4971
24
4972
1
  case NEON::BI__builtin_neon_vsha1h_u32:
4973
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4974
1
                        "vsha1h");
4975
1
  case NEON::BI__builtin_neon_vsha1cq_u32:
4976
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4977
1
                        "vsha1h");
4978
1
  case NEON::BI__builtin_neon_vsha1pq_u32:
4979
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4980
1
                        "vsha1h");
4981
1
  case NEON::BI__builtin_neon_vsha1mq_u32:
4982
1
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4983
1
                        "vsha1h");
4984
24
4985
24
  // The ARM _MoveToCoprocessor builtins put the input register value as
4986
24
  // the first argument, but the LLVM intrinsic expects it as the third one.
4987
2
  case ARM::BI_MoveToCoprocessor:
4988
2
  case ARM::BI_MoveToCoprocessor2: {
4989
2
    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4990
2
                                   
Intrinsic::arm_mcr1
:
Intrinsic::arm_mcr21
);
4991
2
    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4992
2
                                  Ops[3], Ops[4], Ops[5]});
4993
2
  }
4994
2
  case ARM::BI_BitScanForward:
4995
2
  case ARM::BI_BitScanForward64:
4996
2
    return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4997
2
  case ARM::BI_BitScanReverse:
4998
2
  case ARM::BI_BitScanReverse64:
4999
2
    return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5000
2
5001
1
  case ARM::BI_InterlockedAnd64:
5002
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5003
1
  case ARM::BI_InterlockedExchange64:
5004
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5005
1
  case ARM::BI_InterlockedExchangeAdd64:
5006
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5007
1
  case ARM::BI_InterlockedExchangeSub64:
5008
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5009
1
  case ARM::BI_InterlockedOr64:
5010
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5011
1
  case ARM::BI_InterlockedXor64:
5012
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5013
1
  case ARM::BI_InterlockedDecrement64:
5014
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5015
1
  case ARM::BI_InterlockedIncrement64:
5016
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5017
1.17k
  }
5018
1.17k
5019
1.17k
  // Get the last argument, which specifies the vector type.
5020
1.23k
  assert(HasExtraArg);
5021
1.17k
  llvm::APSInt Result;
5022
1.17k
  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5023
1.17k
  if (!Arg->isIntegerConstantExpr(Result, getContext()))
5024
0
    return nullptr;
5025
1.17k
5026
1.17k
  
if (1.17k
BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
5027
1.17k
      
BuiltinID == ARM::BI__builtin_arm_vcvtr_d1.17k
) {
5028
0
    // Determine the overloaded type of this builtin.
5029
0
    llvm::Type *Ty;
5030
0
    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
5031
0
      Ty = FloatTy;
5032
0
    else
5033
0
      Ty = DoubleTy;
5034
0
5035
0
    // Determine whether this is an unsigned conversion or not.
5036
0
    bool usgn = Result.getZExtValue() == 1;
5037
0
    unsigned Int = usgn ? 
Intrinsic::arm_vcvtru0
:
Intrinsic::arm_vcvtr0
;
5038
0
5039
0
    // Call the appropriate intrinsic.
5040
0
    Function *F = CGM.getIntrinsic(Int, Ty);
5041
0
    return Builder.CreateCall(F, Ops, "vcvtr");
5042
0
  }
5043
1.17k
5044
1.17k
  // Determine the type of this overloaded NEON intrinsic.
5045
1.17k
  NeonTypeFlags Type(Result.getZExtValue());
5046
1.17k
  bool usgn = Type.isUnsigned();
5047
1.17k
  bool rightShift = false;
5048
1.17k
5049
1.17k
  llvm::VectorType *VTy = GetNeonType(this, Type);
5050
1.17k
  llvm::Type *Ty = VTy;
5051
1.17k
  if (!Ty)
5052
0
    return nullptr;
5053
1.17k
5054
1.17k
  // Many NEON builtins have identical semantics and uses in ARM and
5055
1.17k
  // AArch64. Emit these in a single function.
5056
1.17k
  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5057
1.17k
  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5058
1.17k
      IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5059
1.17k
  if (Builtin)
5060
958
    return EmitCommonNeonBuiltinExpr(
5061
958
        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5062
958
        Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
5063
215
5064
215
  unsigned Int;
5065
215
  switch (BuiltinID) {
5066
0
  default: return nullptr;
5067
13
  case NEON::BI__builtin_neon_vld1q_lane_v:
5068
13
    // Handle 64-bit integer elements as a special case.  Use shuffles of
5069
13
    // one-element vectors to avoid poor code for i64 in the backend.
5070
13
    if (
VTy->getElementType()->isIntegerTy(64)13
) {
5071
3
      // Extract the other lane.
5072
3
      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5073
3
      uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5074
3
      Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5075
3
      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5076
3
      // Load the value as a one-element vector.
5077
3
      Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5078
3
      llvm::Type *Tys[] = {Ty, Int8PtrTy};
5079
3
      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5080
3
      Value *Align = getAlignmentValue32(PtrOp0);
5081
3
      Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5082
3
      // Combine them.
5083
3
      uint32_t Indices[] = {1 - Lane, Lane};
5084
3
      SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5085
3
      return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5086
3
    }
5087
10
    // fall through
5088
22
  case NEON::BI__builtin_neon_vld1_lane_v: {
5089
22
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5090
22
    PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5091
22
    Value *Ld = Builder.CreateLoad(PtrOp0);
5092
22
    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5093
10
  }
5094
40
  case NEON::BI__builtin_neon_vld2_dup_v:
5095
40
  case NEON::BI__builtin_neon_vld3_dup_v:
5096
40
  case NEON::BI__builtin_neon_vld4_dup_v: {
5097
40
    // Handle 64-bit elements as a special-case.  There is no "dup" needed.
5098
40
    if (
VTy->getElementType()->getPrimitiveSizeInBits() == 6440
) {
5099
8
      switch (BuiltinID) {
5100
2
      case NEON::BI__builtin_neon_vld2_dup_v:
5101
2
        Int = Intrinsic::arm_neon_vld2;
5102
2
        break;
5103
3
      case NEON::BI__builtin_neon_vld3_dup_v:
5104
3
        Int = Intrinsic::arm_neon_vld3;
5105
3
        break;
5106
3
      case NEON::BI__builtin_neon_vld4_dup_v:
5107
3
        Int = Intrinsic::arm_neon_vld4;
5108
3
        break;
5109
0
      
default: 0
llvm_unreachable0
("unknown vld_dup intrinsic?");
5110
8
      }
5111
8
      llvm::Type *Tys[] = {Ty, Int8PtrTy};
5112
8
      Function *F = CGM.getIntrinsic(Int, Tys);
5113
8
      llvm::Value *Align = getAlignmentValue32(PtrOp1);
5114
8
      Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5115
8
      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5116
8
      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5117
8
      return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5118
8
    }
5119
32
    switch (BuiltinID) {
5120
10
    case NEON::BI__builtin_neon_vld2_dup_v:
5121
10
      Int = Intrinsic::arm_neon_vld2lane;
5122
10
      break;
5123
11
    case NEON::BI__builtin_neon_vld3_dup_v:
5124
11
      Int = Intrinsic::arm_neon_vld3lane;
5125
11
      break;
5126
11
    case NEON::BI__builtin_neon_vld4_dup_v:
5127
11
      Int = Intrinsic::arm_neon_vld4lane;
5128
11
      break;
5129
0
    
default: 0
llvm_unreachable0
("unknown vld_dup intrinsic?");
5130
32
    }
5131
32
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
5132
32
    Function *F = CGM.getIntrinsic(Int, Tys);
5133
32
    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5134
32
5135
32
    SmallVector<Value*, 6> Args;
5136
32
    Args.push_back(Ops[1]);
5137
32
    Args.append(STy->getNumElements(), UndefValue::get(Ty));
5138
32
5139
32
    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5140
32
    Args.push_back(CI);
5141
32
    Args.push_back(getAlignmentValue32(PtrOp1));
5142
32
5143
32
    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5144
32
    // splat lane 0 to all elts in each vector of the result.
5145
129
    for (unsigned i = 0, e = STy->getNumElements(); 
i != e129
;
++i97
) {
5146
97
      Value *Val = Builder.CreateExtractValue(Ops[1], i);
5147
97
      Value *Elt = Builder.CreateBitCast(Val, Ty);
5148
97
      Elt = EmitNeonSplat(Elt, CI);
5149
97
      Elt = Builder.CreateBitCast(Elt, Val->getType());
5150
97
      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5151
97
    }
5152
32
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5153
32
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5154
32
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5155
32
  }
5156
7
  case NEON::BI__builtin_neon_vqrshrn_n_v:
5157
7
    Int =
5158
7
      usgn ? 
Intrinsic::arm_neon_vqrshiftnu3
:
Intrinsic::arm_neon_vqrshiftns4
;
5159
7
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5160
7
                        1, true);
5161
3
  case NEON::BI__builtin_neon_vqrshrun_n_v:
5162
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5163
3
                        Ops, "vqrshrun_n", 1, true);
5164
6
  case NEON::BI__builtin_neon_vqshrn_n_v:
5165
6
    Int = usgn ? 
Intrinsic::arm_neon_vqshiftnu3
:
Intrinsic::arm_neon_vqshiftns3
;
5166
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5167
6
                        1, true);
5168
3
  case NEON::BI__builtin_neon_vqshrun_n_v:
5169
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5170
3
                        Ops, "vqshrun_n", 1, true);
5171
0
  case NEON::BI__builtin_neon_vrecpe_v:
5172
0
  case NEON::BI__builtin_neon_vrecpeq_v:
5173
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5174
0
                        Ops, "vrecpe");
5175
6
  case NEON::BI__builtin_neon_vrshrn_n_v:
5176
6
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5177
6
                        Ops, "vrshrn_n", 1, true);
5178
16
  case NEON::BI__builtin_neon_vrsra_n_v:
5179
16
  case NEON::BI__builtin_neon_vrsraq_n_v:
5180
16
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5181
16
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5182
16
    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5183
16
    Int = usgn ? 
Intrinsic::arm_neon_vrshiftu8
:
Intrinsic::arm_neon_vrshifts8
;
5184
16
    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5185
16
    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5186
20
  case NEON::BI__builtin_neon_vsri_n_v:
5187
20
  case NEON::BI__builtin_neon_vsriq_n_v:
5188
20
    rightShift = true;
5189
20
    LLVM_FALLTHROUGH;
5190
40
  case NEON::BI__builtin_neon_vsli_n_v:
5191
40
  case NEON::BI__builtin_neon_vsliq_n_v:
5192
40
    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5193
40
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5194
40
                        Ops, "vsli_n");
5195
19
  case NEON::BI__builtin_neon_vsra_n_v:
5196
19
  case NEON::BI__builtin_neon_vsraq_n_v:
5197
19
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5198
19
    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5199
19
    return Builder.CreateAdd(Ops[0], Ops[1]);
5200
13
  case NEON::BI__builtin_neon_vst1q_lane_v:
5201
13
    // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5202
13
    // a one-element vector and avoid poor code for i64 in the backend.
5203
13
    if (
VTy->getElementType()->isIntegerTy(64)13
) {
5204
3
      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5205
3
      Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5206
3
      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5207
3
      Ops[2] = getAlignmentValue32(PtrOp0);
5208
3
      llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5209
3
      return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5210
3
                                                 Tys), Ops);
5211
3
    }
5212
10
    // fall through
5213
23
  case NEON::BI__builtin_neon_vst1_lane_v: {
5214
23
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5215
23
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5216
23
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5217
23
    auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5218
23
    return St;
5219
10
  }
5220
3
  case NEON::BI__builtin_neon_vtbl1_v:
5221
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5222
3
                        Ops, "vtbl1");
5223
3
  case NEON::BI__builtin_neon_vtbl2_v:
5224
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5225
3
                        Ops, "vtbl2");
5226
3
  case NEON::BI__builtin_neon_vtbl3_v:
5227
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5228
3
                        Ops, "vtbl3");
5229
3
  case NEON::BI__builtin_neon_vtbl4_v:
5230
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5231
3
                        Ops, "vtbl4");
5232
3
  case NEON::BI__builtin_neon_vtbx1_v:
5233
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5234
3
                        Ops, "vtbx1");
5235
3
  case NEON::BI__builtin_neon_vtbx2_v:
5236
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5237
3
                        Ops, "vtbx2");
5238
3
  case NEON::BI__builtin_neon_vtbx3_v:
5239
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5240
3
                        Ops, "vtbx3");
5241
3
  case NEON::BI__builtin_neon_vtbx4_v:
5242
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5243
3
                        Ops, "vtbx4");
5244
0
  }
5245
0
}
5246
5247
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5248
                                      const CallExpr *E,
5249
3.70k
                                      SmallVectorImpl<Value *> &Ops) {
5250
3.70k
  unsigned int Int = 0;
5251
3.70k
  const char *s = nullptr;
5252
3.70k
5253
3.70k
  switch (BuiltinID) {
5254
3.48k
  default:
5255
3.48k
    return nullptr;
5256
108
  case NEON::BI__builtin_neon_vtbl1_v:
5257
108
  case NEON::BI__builtin_neon_vqtbl1_v:
5258
108
  case NEON::BI__builtin_neon_vqtbl1q_v:
5259
108
  case NEON::BI__builtin_neon_vtbl2_v:
5260
108
  case NEON::BI__builtin_neon_vqtbl2_v:
5261
108
  case NEON::BI__builtin_neon_vqtbl2q_v:
5262
108
  case NEON::BI__builtin_neon_vtbl3_v:
5263
108
  case NEON::BI__builtin_neon_vqtbl3_v:
5264
108
  case NEON::BI__builtin_neon_vqtbl3q_v:
5265
108
  case NEON::BI__builtin_neon_vtbl4_v:
5266
108
  case NEON::BI__builtin_neon_vqtbl4_v:
5267
108
  case NEON::BI__builtin_neon_vqtbl4q_v:
5268
108
    break;
5269
108
  case NEON::BI__builtin_neon_vtbx1_v:
5270
108
  case NEON::BI__builtin_neon_vqtbx1_v:
5271
108
  case NEON::BI__builtin_neon_vqtbx1q_v:
5272
108
  case NEON::BI__builtin_neon_vtbx2_v:
5273
108
  case NEON::BI__builtin_neon_vqtbx2_v:
5274
108
  case NEON::BI__builtin_neon_vqtbx2q_v:
5275
108
  case NEON::BI__builtin_neon_vtbx3_v:
5276
108
  case NEON::BI__builtin_neon_vqtbx3_v:
5277
108
  case NEON::BI__builtin_neon_vqtbx3q_v:
5278
108
  case NEON::BI__builtin_neon_vtbx4_v:
5279
108
  case NEON::BI__builtin_neon_vqtbx4_v:
5280
108
  case NEON::BI__builtin_neon_vqtbx4q_v:
5281
108
    break;
5282
216
  }
5283
216
5284
3.70k
  assert(E->getNumArgs() >= 3);
5285
216
5286
216
  // Get the last argument, which specifies the vector type.
5287
216
  llvm::APSInt Result;
5288
216
  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5289
216
  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5290
0
    return nullptr;
5291
216
5292
216
  // Determine the type of this overloaded NEON intrinsic.
5293
216
  NeonTypeFlags Type(Result.getZExtValue());
5294
216
  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5295
216
  if (!Ty)
5296
0
    return nullptr;
5297
216
5298
216
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
5299
216
5300
216
  // AArch64 scalar builtins are not overloaded, they do not have an extra
5301
216
  // argument that specifies the vector type, need to handle each case.
5302
216
  switch (BuiltinID) {
5303
9
  case NEON::BI__builtin_neon_vtbl1_v: {
5304
9
    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5305
9
                              Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5306
9
                              "vtbl1");
5307
216
  }
5308
9
  case NEON::BI__builtin_neon_vtbl2_v: {
5309
9
    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5310
9
                              Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5311
9
                              "vtbl1");
5312
216
  }
5313
9
  case NEON::BI__builtin_neon_vtbl3_v: {
5314
9
    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5315
9
                              Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5316
9
                              "vtbl2");
5317
216
  }
5318
9
  case NEON::BI__builtin_neon_vtbl4_v: {
5319
9
    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5320
9
                              Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5321
9
                              "vtbl2");
5322
216
  }
5323
9
  case NEON::BI__builtin_neon_vtbx1_v: {
5324
9
    Value *TblRes =
5325
9
        packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5326
9
                           Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5327
9
5328
9
    llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5329
9
    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5330
9
    CmpRes = Builder.CreateSExt(CmpRes, Ty);
5331
9
5332
9
    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5333
9
    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5334
9
    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5335
216
  }
5336
9
  case NEON::BI__builtin_neon_vtbx2_v: {
5337
9
    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5338
9
                              Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5339
9
                              "vtbx1");
5340
216
  }
5341
9
  case NEON::BI__builtin_neon_vtbx3_v: {
5342
9
    Value *TblRes =
5343
9
        packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5344
9
                           Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5345
9
5346
9
    llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5347
9
    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5348
9
                                           TwentyFourV);
5349
9
    CmpRes = Builder.CreateSExt(CmpRes, Ty);
5350
9
5351
9
    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5352
9
    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5353
9
    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5354
216
  }
5355
9
  case NEON::BI__builtin_neon_vtbx4_v: {
5356
9
    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5357
9
                              Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5358
9
                              "vtbx2");
5359
216
  }
5360
18
  case NEON::BI__builtin_neon_vqtbl1_v:
5361
18
  case NEON::BI__builtin_neon_vqtbl1q_v:
5362
18
    Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5363
18
  case NEON::BI__builtin_neon_vqtbl2_v:
5364
18
  case NEON::BI__builtin_neon_vqtbl2q_v: {
5365
18
    Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5366
18
  case NEON::BI__builtin_neon_vqtbl3_v:
5367
18
  case NEON::BI__builtin_neon_vqtbl3q_v:
5368
18
    Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5369
18
  case NEON::BI__builtin_neon_vqtbl4_v:
5370
18
  case NEON::BI__builtin_neon_vqtbl4q_v:
5371
18
    Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5372
18
  case NEON::BI__builtin_neon_vqtbx1_v:
5373
18
  case NEON::BI__builtin_neon_vqtbx1q_v:
5374
18
    Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5375
18
  case NEON::BI__builtin_neon_vqtbx2_v:
5376
18
  case NEON::BI__builtin_neon_vqtbx2q_v:
5377
18
    Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5378
18
  case NEON::BI__builtin_neon_vqtbx3_v:
5379
18
  case NEON::BI__builtin_neon_vqtbx3q_v:
5380
18
    Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5381
18
  case NEON::BI__builtin_neon_vqtbx4_v:
5382
18
  case NEON::BI__builtin_neon_vqtbx4q_v:
5383
18
    Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5384
144
  }
5385
144
  }
5386
144
5387
144
  
if (144
!Int144
)
5388
0
    return nullptr;
5389
144
5390
144
  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5391
144
  return CGF.EmitNeonCall(F, Ops, s);
5392
144
}
5393
5394
36
Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5395
36
  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5396
36
  Op = Builder.CreateBitCast(Op, Int16Ty);
5397
36
  Value *V = UndefValue::get(VTy);
5398
36
  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5399
36
  Op = Builder.CreateInsertElement(V, Op, CI);
5400
36
  return Op;
5401
36
}
5402
5403
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5404
17.3k
                                               const CallExpr *E) {
5405
17.3k
  unsigned HintID = static_cast<unsigned>(-1);
5406
17.3k
  switch (BuiltinID) {
5407
17.3k
  default: break;
5408
2
  case AArch64::BI__builtin_arm_nop:
5409
2
    HintID = 0;
5410
2
    break;
5411
2
  case AArch64::BI__builtin_arm_yield:
5412
2
    HintID = 1;
5413
2
    break;
5414
2
  case AArch64::BI__builtin_arm_wfe:
5415
2
    HintID = 2;
5416
2
    break;
5417
2
  case AArch64::BI__builtin_arm_wfi:
5418
2
    HintID = 3;
5419
2
    break;
5420
2
  case AArch64::BI__builtin_arm_sev:
5421
2
    HintID = 4;
5422
2
    break;
5423
2
  case AArch64::BI__builtin_arm_sevl:
5424
2
    HintID = 5;
5425
2
    break;
5426
17.3k
  }
5427
17.3k
5428
17.3k
  
if (17.3k
HintID != static_cast<unsigned>(-1)17.3k
) {
5429
12
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5430
12
    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5431
12
  }
5432
17.3k
5433
17.3k
  
if (17.3k
BuiltinID == AArch64::BI__builtin_arm_prefetch17.3k
) {
5434
8
    Value *Address         = EmitScalarExpr(E->getArg(0));
5435
8
    Value *RW              = EmitScalarExpr(E->getArg(1));
5436
8
    Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5437
8
    Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5438
8
    Value *IsData          = EmitScalarExpr(E->getArg(4));
5439
8
5440
8
    Value *Locality = nullptr;
5441
8
    if (
cast<llvm::ConstantInt>(RetentionPolicy)->isZero()8
) {
5442
6
      // Temporal fetch, needs to convert cache level to locality.
5443
6
      Locality = llvm::ConstantInt::get(Int32Ty,
5444
6
        -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5445
8
    } else {
5446
2
      // Streaming fetch.
5447
2
      Locality = llvm::ConstantInt::get(Int32Ty, 0);
5448
2
    }
5449
8
5450
8
    // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5451
8
    // PLDL3STRM or PLDL2STRM.
5452
8
    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5453
8
    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5454
8
  }
5455
17.3k
5456
17.3k
  
if (17.3k
BuiltinID == AArch64::BI__builtin_arm_rbit17.3k
) {
5457
2
    assert((getContext().getTypeSize(E->getType()) == 32) &&
5458
2
           "rbit of unusual size!");
5459
2
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5460
2
    return Builder.CreateCall(
5461
2
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5462
2
  }
5463
17.3k
  
if (17.3k
BuiltinID == AArch64::BI__builtin_arm_rbit6417.3k
) {
5464
2
    assert((getContext().getTypeSize(E->getType()) == 64) &&
5465
2
           "rbit of unusual size!");
5466
2
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5467
2
    return Builder.CreateCall(
5468
2
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5469
2
  }
5470
17.3k
5471
17.3k
  
if (17.3k
BuiltinID == AArch64::BI__clear_cache17.3k
) {
5472
1
    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5473
1
    const FunctionDecl *FD = E->getDirectCallee();
5474
1
    Value *Ops[2];
5475
3
    for (unsigned i = 0; 
i < 23
;
i++2
)
5476
2
      Ops[i] = EmitScalarExpr(E->getArg(i));
5477
1
    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5478
1
    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5479
1
    StringRef Name = FD->getName();
5480
1
    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5481
1
  }
5482
17.3k
5483
17.3k
  
if (17.3k
(BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5484
17.3k
      BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5485
17.3k
      
getContext().getTypeSize(E->getType()) == 12822
) {
5486
2
    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5487
1
                                       ? Intrinsic::aarch64_ldaxp
5488
1
                                       : Intrinsic::aarch64_ldxp);
5489
2
5490
2
    Value *LdPtr = EmitScalarExpr(E->getArg(0));
5491
2
    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5492
2
                                    "ldxp");
5493
2
5494
2
    Value *Val0 = Builder.CreateExtractValue(Val, 1);
5495
2
    Value *Val1 = Builder.CreateExtractValue(Val, 0);
5496
2
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5497
2
    Val0 = Builder.CreateZExt(Val0, Int128Ty);
5498
2
    Val1 = Builder.CreateZExt(Val1, Int128Ty);
5499
2
5500
2
    Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5501
2
    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5502
2
    Val = Builder.CreateOr(Val, Val1);
5503
2
    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5504
17.3k
  } else 
if (17.3k
BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5505
17.3k
             
BuiltinID == AArch64::BI__builtin_arm_ldaex17.3k
) {
5506
20
    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5507
20
5508
20
    QualType Ty = E->getType();
5509
20
    llvm::Type *RealResTy = ConvertType(Ty);
5510
20
    llvm::Type *PtrTy = llvm::IntegerType::get(
5511
20
        getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5512
20
    LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5513
20
5514
20
    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5515
9
                                       ? Intrinsic::aarch64_ldaxr
5516
11
                                       : Intrinsic::aarch64_ldxr,
5517
20
                                   PtrTy);
5518
20
    Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5519
20
5520
20
    if (RealResTy->isPointerTy())
5521
4
      return Builder.CreateIntToPtr(Val, RealResTy);
5522
16
5523
16
    llvm::Type *IntResTy = llvm::IntegerType::get(
5524
16
        getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5525
16
    Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5526
16
    return Builder.CreateBitCast(Val, RealResTy);
5527
16
  }
5528
17.2k
5529
17.2k
  
if (17.2k
(BuiltinID == AArch64::BI__builtin_arm_strex ||
5530
17.2k
       BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5531
17.2k
      
getContext().getTypeSize(E->getArg(0)->getType()) == 12818
) {
5532
2
    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5533
1
                                       ? Intrinsic::aarch64_stlxp
5534
1
                                       : Intrinsic::aarch64_stxp);
5535
2
    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5536
2
5537
2
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5538
2
    EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5539
2
5540
2
    Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5541
2
    llvm::Value *Val = Builder.CreateLoad(Tmp);
5542
2
5543
2
    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5544
2
    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5545
2
    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5546
2
                                         Int8PtrTy);
5547
2
    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5548
2
  }
5549
17.2k
5550
17.2k
  
if (17.2k
BuiltinID == AArch64::BI__builtin_arm_strex ||
5551
17.2k
      
BuiltinID == AArch64::BI__builtin_arm_stlex17.2k
) {
5552
16
    Value *StoreVal = EmitScalarExpr(E->getArg(0));
5553
16
    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5554
16
5555
16
    QualType Ty = E->getArg(0)->getType();
5556
16
    llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5557
16
                                                 getContext().getTypeSize(Ty));
5558
16
    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5559
16
5560
16
    if (StoreVal->getType()->isPointerTy())
5561
2
      StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5562
14
    else {
5563
14
      llvm::Type *IntTy = llvm::IntegerType::get(
5564
14
          getLLVMContext(),
5565
14
          CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5566
14
      StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5567
14
      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5568
14
    }
5569
16
5570
16
    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5571
7
                                       ? Intrinsic::aarch64_stlxr
5572
9
                                       : Intrinsic::aarch64_stxr,
5573
16
                                   StoreAddr->getType());
5574
16
    return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5575
16
  }
5576
17.2k
5577
17.2k
  
if (17.2k
BuiltinID == AArch64::BI__builtin_arm_clrex17.2k
) {
5578
1
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5579
1
    return Builder.CreateCall(F);
5580
1
  }
5581
17.2k
5582
17.2k
  // CRC32
5583
17.2k
  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5584
17.2k
  switch (BuiltinID) {
5585
2
  case AArch64::BI__builtin_arm_crc32b:
5586
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5587
2
  case AArch64::BI__builtin_arm_crc32cb:
5588
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5589
2
  case AArch64::BI__builtin_arm_crc32h:
5590
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5591
2
  case AArch64::BI__builtin_arm_crc32ch:
5592
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5593
2
  case AArch64::BI__builtin_arm_crc32w:
5594
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5595
2
  case AArch64::BI__builtin_arm_crc32cw:
5596
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5597
2
  case AArch64::BI__builtin_arm_crc32d:
5598
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5599
2
  case AArch64::BI__builtin_arm_crc32cd:
5600
2
    CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5601
17.2k
  }
5602
17.2k
5603
17.2k
  
if (17.2k
CRCIntrinsicID != Intrinsic::not_intrinsic17.2k
) {
5604
16
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
5605
16
    Value *Arg1 = EmitScalarExpr(E->getArg(1));
5606
16
    Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5607
16
5608
16
    llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5609
16
    Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5610
16
5611
16
    return Builder.CreateCall(F, {Arg0, Arg1});
5612
16
  }
5613
17.2k
5614
17.2k
  
if (17.2k
BuiltinID == AArch64::BI__builtin_arm_rsr ||
5615
17.2k
      BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5616
17.2k
      BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5617
17.2k
      BuiltinID == AArch64::BI__builtin_arm_wsr ||
5618
17.2k
      BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5619
17.2k
      
BuiltinID == AArch64::BI__builtin_arm_wsrp17.2k
) {
5620
12
5621
12
    bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5622
10
                  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5623
8
                  BuiltinID == AArch64::BI__builtin_arm_rsrp;
5624
12
5625
12
    bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5626
10
                            BuiltinID == AArch64::BI__builtin_arm_wsrp;
5627
12
5628
12
    bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5629
10
                   BuiltinID != AArch64::BI__builtin_arm_wsr;
5630
12
5631
12
    llvm::Type *ValueType;
5632
12
    llvm::Type *RegisterType = Int64Ty;
5633
12
    if (
IsPointerBuiltin12
) {
5634
4
      ValueType = VoidPtrTy;
5635
12
    } else 
if (8
Is64Bit8
) {
5636
4
      ValueType = Int64Ty;
5637
8
    } else {
5638
4
      ValueType = Int32Ty;
5639
4
    }
5640
12
5641
12
    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5642
12
  }
5643
17.2k
5644
17.2k
  // Find out if any arguments are required to be integer constant
5645
17.2k
  // expressions.
5646
17.2k
  unsigned ICEArguments = 0;
5647
17.2k
  ASTContext::GetBuiltinTypeError Error;
5648
17.2k
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5649
17.2k
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
5650
17.2k
5651
17.2k
  llvm::SmallVector<Value*, 4> Ops;
5652
42.2k
  for (unsigned i = 0, e = E->getNumArgs() - 1; 
i != e42.2k
;
i++25.0k
) {
5653
25.0k
    if (
(ICEArguments & (1 << i)) == 025.0k
) {
5654
23.4k
      Ops.push_back(EmitScalarExpr(E->getArg(i)));
5655
25.0k
    } else {
5656
1.51k
      // If this is required to be a constant, constant fold it so that we know
5657
1.51k
      // that the generated intrinsic gets a ConstantInt.
5658
1.51k
      llvm::APSInt Result;
5659
1.51k
      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5660
1.51k
      assert(IsConst && "Constant arg isn't actually constant?");
5661
1.51k
      (void)IsConst;
5662
1.51k
      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5663
1.51k
    }
5664
25.0k
  }
5665
17.2k
5666
17.2k
  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5667
17.2k
  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5668
17.2k
      SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5669
17.2k
5670
17.2k
  if (
Builtin17.2k
) {
5671
582
    Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5672
582
    Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5673
582
    assert(Result && "SISD intrinsic should have been handled");
5674
582
    return Result;
5675
582
  }
5676
16.6k
5677
16.6k
  llvm::APSInt Result;
5678
16.6k
  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5679
16.6k
  NeonTypeFlags Type(0);
5680
16.6k
  if (Arg->isIntegerConstantExpr(Result, getContext()))
5681
16.6k
    // Determine the type of this overloaded NEON intrinsic.
5682
16.3k
    Type = NeonTypeFlags(Result.getZExtValue());
5683
16.6k
5684
16.6k
  bool usgn = Type.isUnsigned();
5685
16.6k
  bool quad = Type.isQuad();
5686
16.6k
5687
16.6k
  // Handle non-overloaded intrinsics first.
5688
16.6k
  switch (BuiltinID) {
5689
5.28k
  default: break;
5690
2
  case NEON::BI__builtin_neon_vldrq_p128: {
5691
2
    llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5692
2
    llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5693
2
    Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5694
2
    return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5695
2
                                     CharUnits::fromQuantity(16));
5696
16.6k
  }
5697
2
  case NEON::BI__builtin_neon_vstrq_p128: {
5698
2
    llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5699
2
    Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5700
2
    return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5701
16.6k
  }
5702
6
  case NEON::BI__builtin_neon_vcvts_u32_f32:
5703
6
  case NEON::BI__builtin_neon_vcvtd_u64_f64:
5704
6
    usgn = true;
5705
6
    // FALL THROUGH
5706
12
  case NEON::BI__builtin_neon_vcvts_s32_f32:
5707
12
  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5708
12
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5709
12
    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5710
12
    llvm::Type *InTy = Is64 ? 
Int64Ty6
:
Int32Ty6
;
5711
12
    llvm::Type *FTy = Is64 ? 
DoubleTy6
:
FloatTy6
;
5712
12
    Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5713
12
    if (usgn)
5714
6
      return Builder.CreateFPToUI(Ops[0], InTy);
5715
6
    return Builder.CreateFPToSI(Ops[0], InTy);
5716
6
  }
5717
6
  case NEON::BI__builtin_neon_vcvts_f32_u32:
5718
6
  case NEON::BI__builtin_neon_vcvtd_f64_u64:
5719
6
    usgn = true;
5720
6
    // FALL THROUGH
5721
12
  case NEON::BI__builtin_neon_vcvts_f32_s32:
5722
12
  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5723
12
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5724
12
    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5725
12
    llvm::Type *InTy = Is64 ? 
Int64Ty6
:
Int32Ty6
;
5726
12
    llvm::Type *FTy = Is64 ? 
DoubleTy6
:
FloatTy6
;
5727
12
    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5728
12
    if (usgn)
5729
6
      return Builder.CreateUIToFP(Ops[0], FTy);
5730
6
    return Builder.CreateSIToFP(Ops[0], FTy);
5731
6
  }
5732
0
  case NEON::BI__builtin_neon_vpaddd_s64: {
5733
0
    llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5734
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
5735
0
    // The vector is v2f64, so make sure it's bitcast to that.
5736
0
    Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5737
0
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5738
0
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5739
0
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5740
0
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5741
0
    // Pairwise addition of a v2f64 into a scalar f64.
5742
0
    return Builder.CreateAdd(Op0, Op1, "vpaddd");
5743
6
  }
5744
3
  case NEON::BI__builtin_neon_vpaddd_f64: {
5745
3
    llvm::Type *Ty =
5746
3
      llvm::VectorType::get(DoubleTy, 2);
5747
3
    Value *Vec = EmitScalarExpr(E->getArg(0));
5748
3
    // The vector is v2f64, so make sure it's bitcast to that.
5749
3
    Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5750
3
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5751
3
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5752
3
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5753
3
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5754
3
    // Pairwise addition of a v2f64 into a scalar f64.
5755
3
    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5756
6
  }
5757
3
  case NEON::BI__builtin_neon_vpadds_f32: {
5758
3
    llvm::Type *Ty =
5759
3
      llvm::VectorType::get(FloatTy, 2);
5760
3
    Value *Vec = EmitScalarExpr(E->getArg(0));
5761
3
    // The vector is v2f32, so make sure it's bitcast to that.
5762
3
    Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5763
3
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5764
3
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5765
3
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5766
3
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5767
3
    // Pairwise addition of a v2f32 into a scalar f32.
5768
3
    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5769
6
  }
5770
9
  case NEON::BI__builtin_neon_vceqzd_s64:
5771
9
  case NEON::BI__builtin_neon_vceqzd_f64:
5772
9
  case NEON::BI__builtin_neon_vceqzs_f32:
5773
9
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5774
9
    return EmitAArch64CompareBuiltinExpr(
5775
9
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5776
9
        ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5777
9
  case NEON::BI__builtin_neon_vcgezd_s64:
5778
9
  case NEON::BI__builtin_neon_vcgezd_f64:
5779
9
  case NEON::BI__builtin_neon_vcgezs_f32:
5780
9
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5781
9
    return EmitAArch64CompareBuiltinExpr(
5782
9
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5783
9
        ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5784
9
  case NEON::BI__builtin_neon_vclezd_s64:
5785
9
  case NEON::BI__builtin_neon_vclezd_f64:
5786
9
  case NEON::BI__builtin_neon_vclezs_f32:
5787
9
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5788
9
    return EmitAArch64CompareBuiltinExpr(
5789
9
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5790
9
        ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5791
9
  case NEON::BI__builtin_neon_vcgtzd_s64:
5792
9
  case NEON::BI__builtin_neon_vcgtzd_f64:
5793
9
  case NEON::BI__builtin_neon_vcgtzs_f32:
5794
9
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5795
9
    return EmitAArch64CompareBuiltinExpr(
5796
9
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5797
9
        ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5798
9
  case NEON::BI__builtin_neon_vcltzd_s64:
5799
9
  case NEON::BI__builtin_neon_vcltzd_f64:
5800
9
  case NEON::BI__builtin_neon_vcltzs_f32:
5801
9
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5802
9
    return EmitAArch64CompareBuiltinExpr(
5803
9
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5804
9
        ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5805
9
5806
3
  case NEON::BI__builtin_neon_vceqzd_u64: {
5807
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5808
3
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5809
3
    Ops[0] =
5810
3
        Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5811
3
    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5812
9
  }
5813
15
  case NEON::BI__builtin_neon_vceqd_f64:
5814
15
  case NEON::BI__builtin_neon_vcled_f64:
5815
15
  case NEON::BI__builtin_neon_vcltd_f64:
5816
15
  case NEON::BI__builtin_neon_vcged_f64:
5817
15
  case NEON::BI__builtin_neon_vcgtd_f64: {
5818
15
    llvm::CmpInst::Predicate P;
5819
15
    switch (BuiltinID) {
5820
0
    
default: 0
llvm_unreachable0
("missing builtin ID in switch!");
5821
3
    case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5822
3
    case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5823
3
    case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5824
3
    case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5825
3
    case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5826
15
    }
5827
15
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5828
15
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5829
15
    Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5830
15
    Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5831
15
    return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5832
15
  }
5833
15
  case NEON::BI__builtin_neon_vceqs_f32:
5834
15
  case NEON::BI__builtin_neon_vcles_f32:
5835
15
  case NEON::BI__builtin_neon_vclts_f32:
5836
15
  case NEON::BI__builtin_neon_vcges_f32:
5837
15
  case NEON::BI__builtin_neon_vcgts_f32: {
5838
15
    llvm::CmpInst::Predicate P;
5839
15
    switch (BuiltinID) {
5840
0
    
default: 0
llvm_unreachable0
("missing builtin ID in switch!");
5841
3
    case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5842
3
    case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5843
3
    case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5844
3
    case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5845
3
    case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5846
15
    }
5847
15
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5848
15
    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5849
15
    Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5850
15
    Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5851
15
    return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5852
15
  }
5853
30
  case NEON::BI__builtin_neon_vceqd_s64:
5854
30
  case NEON::BI__builtin_neon_vceqd_u64:
5855
30
  case NEON::BI__builtin_neon_vcgtd_s64:
5856
30
  case NEON::BI__builtin_neon_vcgtd_u64:
5857
30
  case NEON::BI__builtin_neon_vcltd_s64:
5858
30
  case NEON::BI__builtin_neon_vcltd_u64:
5859
30
  case NEON::BI__builtin_neon_vcged_u64:
5860
30
  case NEON::BI__builtin_neon_vcged_s64:
5861
30
  case NEON::BI__builtin_neon_vcled_u64:
5862
30
  case NEON::BI__builtin_neon_vcled_s64: {
5863
30
    llvm::CmpInst::Predicate P;
5864
30
    switch (BuiltinID) {
5865
0
    
default: 0
llvm_unreachable0
("missing builtin ID in switch!");
5866
6
    case NEON::BI__builtin_neon_vceqd_s64:
5867
6
    case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5868
3
    case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5869
3
    case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5870
3
    case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5871
3
    case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5872
3
    case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5873
3
    case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5874
3
    case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5875
3
    case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5876
30
    }
5877
30
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5878
30
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5879
30
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5880
30
    Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5881
30
    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5882
30
  }
5883
6
  case NEON::BI__builtin_neon_vtstd_s64:
5884
6
  case NEON::BI__builtin_neon_vtstd_u64: {
5885
6
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5886
6
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5887
6
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5888
6
    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5889
6
    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5890
6
                                llvm::Constant::getNullValue(Int64Ty));
5891
6
    return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5892
6
  }
5893
182
  case NEON::BI__builtin_neon_vset_lane_i8:
5894
182
  case NEON::BI__builtin_neon_vset_lane_i16:
5895
182
  case NEON::BI__builtin_neon_vset_lane_i32:
5896
182
  case NEON::BI__builtin_neon_vset_lane_i64:
5897
182
  case NEON::BI__builtin_neon_vset_lane_f32:
5898
182
  case NEON::BI__builtin_neon_vsetq_lane_i8:
5899
182
  case NEON::BI__builtin_neon_vsetq_lane_i16:
5900
182
  case NEON::BI__builtin_neon_vsetq_lane_i32:
5901
182
  case NEON::BI__builtin_neon_vsetq_lane_i64:
5902
182
  case NEON::BI__builtin_neon_vsetq_lane_f32:
5903
182
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
5904
182
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5905
15
  case NEON::BI__builtin_neon_vset_lane_f64:
5906
15
    // The vector type needs a cast for the v1f64 variant.
5907
15
    Ops[1] = Builder.CreateBitCast(Ops[1],
5908
15
                                   llvm::VectorType::get(DoubleTy, 1));
5909
15
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
5910
15
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5911
7
  case NEON::BI__builtin_neon_vsetq_lane_f64:
5912
7
    // The vector type needs a cast for the v2f64 variant.
5913
7
    Ops[1] = Builder.CreateBitCast(Ops[1],
5914
7
        llvm::VectorType::get(DoubleTy, 2));
5915
7
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
5916
7
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5917
182
5918
34
  case NEON::BI__builtin_neon_vget_lane_i8:
5919
34
  case NEON::BI__builtin_neon_vdupb_lane_i8:
5920
34
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5921
34
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5922
34
                                        "vget_lane");
5923
32
  case NEON::BI__builtin_neon_vgetq_lane_i8:
5924
32
  case NEON::BI__builtin_neon_vdupb_laneq_i8:
5925
32
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5926
32
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5927
32
                                        "vgetq_lane");
5928
46
  case NEON::BI__builtin_neon_vget_lane_i16:
5929
46
  case NEON::BI__builtin_neon_vduph_lane_i16:
5930
46
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5931
46
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5932
46
                                        "vget_lane");
5933
44
  case NEON::BI__builtin_neon_vgetq_lane_i16:
5934
44
  case NEON::BI__builtin_neon_vduph_laneq_i16:
5935
44
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5936
44
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5937
44
                                        "vgetq_lane");
5938
35
  case NEON::BI__builtin_neon_vget_lane_i32:
5939
35
  case NEON::BI__builtin_neon_vdups_lane_i32:
5940
35
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5941
35
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5942
35
                                        "vget_lane");
5943
5
  case NEON::BI__builtin_neon_vdups_lane_f32:
5944
5
    Ops[0] = Builder.CreateBitCast(Ops[0],
5945
5
        llvm::VectorType::get(FloatTy, 2));
5946
5
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5947
5
                                        "vdups_lane");
5948
33
  case NEON::BI__builtin_neon_vgetq_lane_i32:
5949
33
  case NEON::BI__builtin_neon_vdups_laneq_i32:
5950
33
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5951
33
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5952
33
                                        "vgetq_lane");
5953
2.60k
  case NEON::BI__builtin_neon_vget_lane_i64:
5954
2.60k
  case NEON::BI__builtin_neon_vdupd_lane_i64:
5955
2.60k
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5956
2.60k
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5957
2.60k
                                        "vget_lane");
5958
5
  case NEON::BI__builtin_neon_vdupd_lane_f64:
5959
5
    Ops[0] = Builder.CreateBitCast(Ops[0],
5960
5
        llvm::VectorType::get(DoubleTy, 1));
5961
5
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5962
5
                                        "vdupd_lane");
5963
6.05k
  case NEON::BI__builtin_neon_vgetq_lane_i64:
5964
6.05k
  case NEON::BI__builtin_neon_vdupd_laneq_i64:
5965
6.05k
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5966
6.05k
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5967
6.05k
                                        "vgetq_lane");
5968
497
  case NEON::BI__builtin_neon_vget_lane_f32:
5969
497
    Ops[0] = Builder.CreateBitCast(Ops[0],
5970
497
        llvm::VectorType::get(FloatTy, 2));
5971
497
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5972
497
                                        "vget_lane");
5973
191
  case NEON::BI__builtin_neon_vget_lane_f64:
5974
191
    Ops[0] = Builder.CreateBitCast(Ops[0],
5975
191
        llvm::VectorType::get(DoubleTy, 1));
5976
191
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5977
191
                                        "vget_lane");
5978
969
  case NEON::BI__builtin_neon_vgetq_lane_f32:
5979
969
  case NEON::BI__builtin_neon_vdups_laneq_f32:
5980
969
    Ops[0] = Builder.CreateBitCast(Ops[0],
5981
969
        llvm::VectorType::get(FloatTy, 4));
5982
969
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5983
969
                                        "vgetq_lane");
5984
397
  case NEON::BI__builtin_neon_vgetq_lane_f64:
5985
397
  case NEON::BI__builtin_neon_vdupd_laneq_f64:
5986
397
    Ops[0] = Builder.CreateBitCast(Ops[0],
5987
397
        llvm::VectorType::get(DoubleTy, 2));
5988
397
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5989
397
                                        "vgetq_lane");
5990
6
  case NEON::BI__builtin_neon_vaddd_s64:
5991
6
  case NEON::BI__builtin_neon_vaddd_u64:
5992
6
    return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5993
6
  case NEON::BI__builtin_neon_vsubd_s64:
5994
6
  case NEON::BI__builtin_neon_vsubd_u64:
5995
6
    return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5996
6
  case NEON::BI__builtin_neon_vqdmlalh_s16:
5997
6
  case NEON::BI__builtin_neon_vqdmlslh_s16: {
5998
6
    SmallVector<Value *, 2> ProductOps;
5999
6
    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6000
6
    ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6001
6
    llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6002
6
    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6003
6
                          ProductOps, "vqdmlXl");
6004
6
    Constant *CI = ConstantInt::get(SizeTy, 0);
6005
6
    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6006
6
6007
6
    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6008
3
                                        ? Intrinsic::aarch64_neon_sqadd
6009
3
                                        : Intrinsic::aarch64_neon_sqsub;
6010
6
    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6011
6
  }
6012
3
  case NEON::BI__builtin_neon_vqshlud_n_s64: {
6013
3
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
6014
3
    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6015
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6016
3
                        Ops, "vqshlu_n");
6017
6
  }
6018
6
  case NEON::BI__builtin_neon_vqshld_n_u64:
6019
6
  case NEON::BI__builtin_neon_vqshld_n_s64: {
6020
6
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6021
3
                                   ? Intrinsic::aarch64_neon_uqshl
6022
3
                                   : Intrinsic::aarch64_neon_sqshl;
6023
6
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
6024
6
    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6025
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6026
6
  }
6027
6
  case NEON::BI__builtin_neon_vrshrd_n_u64:
6028
6
  case NEON::BI__builtin_neon_vrshrd_n_s64: {
6029
6
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6030
3
                                   ? Intrinsic::aarch64_neon_urshl
6031
3
                                   : Intrinsic::aarch64_neon_srshl;
6032
6
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
6033
6
    int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6034
6
    Ops[1] = ConstantInt::get(Int64Ty, -SV);
6035
6
    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6036
6
  }
6037
6
  case NEON::BI__builtin_neon_vrsrad_n_u64:
6038
6
  case NEON::BI__builtin_neon_vrsrad_n_s64: {
6039
6
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6040
3
                                   ? Intrinsic::aarch64_neon_urshl
6041
3
                                   : Intrinsic::aarch64_neon_srshl;
6042
6
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6043
6
    Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6044
6
    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6045
6
                                {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6046
6
    return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6047
6
  }
6048
6
  case NEON::BI__builtin_neon_vshld_n_s64:
6049
6
  case NEON::BI__builtin_neon_vshld_n_u64: {
6050
6
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6051
6
    return Builder.CreateShl(
6052
6
        Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6053
6
  }
6054
3
  case NEON::BI__builtin_neon_vshrd_n_s64: {
6055
3
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6056
3
    return Builder.CreateAShr(
6057
3
        Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6058
3
                                                   Amt->getZExtValue())),
6059
3
        "shrd_n");
6060
6
  }
6061
4
  case NEON::BI__builtin_neon_vshrd_n_u64: {
6062
4
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6063
4
    uint64_t ShiftAmt = Amt->getZExtValue();
6064
4
    // Right-shifting an unsigned value by its size yields 0.
6065
4
    if (ShiftAmt == 64)
6066
2
      return ConstantInt::get(Int64Ty, 0);
6067
2
    return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6068
2
                              "shrd_n");
6069
2
  }
6070
3
  case NEON::BI__builtin_neon_vsrad_n_s64: {
6071
3
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6072
3
    Ops[1] = Builder.CreateAShr(
6073
3
        Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6074
3
                                                   Amt->getZExtValue())),
6075
3
        "shrd_n");
6076
3
    return Builder.CreateAdd(Ops[0], Ops[1]);
6077
2
  }
6078
4
  case NEON::BI__builtin_neon_vsrad_n_u64: {
6079
4
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6080
4
    uint64_t ShiftAmt = Amt->getZExtValue();
6081
4
    // Right-shifting an unsigned value by its size yields 0.
6082
4
    // As Op + 0 = Op, return Ops[0] directly.
6083
4
    if (ShiftAmt == 64)
6084
1
      return Ops[0];
6085
3
    Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6086
3
                                "shrd_n");
6087
3
    return Builder.CreateAdd(Ops[0], Ops[1]);
6088
3
  }
6089
12
  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6090
12
  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6091
12
  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6092
12
  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6093
12
    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6094
12
                                          "lane");
6095
12
    SmallVector<Value *, 2> ProductOps;
6096
12
    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6097
12
    ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6098
12
    llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6099
12
    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6100
12
                          ProductOps, "vqdmlXl");
6101
12
    Constant *CI = ConstantInt::get(SizeTy, 0);
6102
12
    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6103
12
    Ops.pop_back();
6104
12
6105
12
    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6106
9
                       BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6107
6
                          ? Intrinsic::aarch64_neon_sqadd
6108
6
                          : Intrinsic::aarch64_neon_sqsub;
6109
12
    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6110
12
  }
6111
6
  case NEON::BI__builtin_neon_vqdmlals_s32:
6112
6
  case NEON::BI__builtin_neon_vqdmlsls_s32: {
6113
6
    SmallVector<Value *, 2> ProductOps;
6114
6
    ProductOps.push_back(Ops[1]);
6115
6
    ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6116
6
    Ops[1] =
6117
6
        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6118
6
                     ProductOps, "vqdmlXl");
6119
6
6120
6
    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6121
3
                                        ? Intrinsic::aarch64_neon_sqadd
6122
3
                                        : Intrinsic::aarch64_neon_sqsub;
6123
6
    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6124
6
  }
6125
12
  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6126
12
  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6127
12
  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6128
12
  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6129
12
    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6130
12
                                          "lane");
6131
12
    SmallVector<Value *, 2> ProductOps;
6132
12
    ProductOps.push_back(Ops[1]);
6133
12
    ProductOps.push_back(Ops[2]);
6134
12
    Ops[1] =
6135
12
        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6136
12
                     ProductOps, "vqdmlXl");
6137
12
    Ops.pop_back();
6138
12
6139
12
    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6140
9
                       BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6141
6
                          ? Intrinsic::aarch64_neon_sqadd
6142
6
                          : Intrinsic::aarch64_neon_sqsub;
6143
12
    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6144
5.28k
  }
6145
5.28k
  }
6146
5.28k
6147
5.28k
  llvm::VectorType *VTy = GetNeonType(this, Type);
6148
5.28k
  llvm::Type *Ty = VTy;
6149
5.28k
  if (!Ty)
6150
0
    return nullptr;
6151
5.28k
6152
5.28k
  // Not all intrinsics handled by the common case work for AArch64 yet, so only
6153
5.28k
  // defer to common code if it's been added to our special map.
6154
5.28k
  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6155
5.28k
                                   AArch64SIMDIntrinsicsProvenSorted);
6156
5.28k
6157
5.28k
  if (Builtin)
6158
1.57k
    return EmitCommonNeonBuiltinExpr(
6159
1.57k
        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6160
1.57k
        Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6161
1.57k
        /*never use addresses*/ Address::invalid(), Address::invalid());
6162
3.70k
6163
3.70k
  
if (Value *3.70k
V3.70k
= EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6164
216
    return V;
6165
3.48k
6166
3.48k
  unsigned Int;
6167
3.48k
  switch (BuiltinID) {
6168
0
  default: return nullptr;
6169
73
  case NEON::BI__builtin_neon_vbsl_v:
6170
73
  case NEON::BI__builtin_neon_vbslq_v: {
6171
73
    llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6172
73
    Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6173
73
    Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6174
73
    Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6175
73
6176
73
    Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6177
73
    Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6178
73
    Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6179
73
    return Builder.CreateBitCast(Ops[0], Ty);
6180
73
  }
6181
28
  case NEON::BI__builtin_neon_vfma_lane_v:
6182
28
  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6183
28
    // The ARM builtins (and instructions) have the addend as the first
6184
28
    // operand, but the 'fma' intrinsics have it last. Swap it around here.
6185
28
    Value *Addend = Ops[0];
6186
28
    Value *Multiplicand = Ops[1];
6187
28
    Value *LaneSource = Ops[2];
6188
28
    Ops[0] = Multiplicand;
6189
28
    Ops[1] = LaneSource;
6190
28
    Ops[2] = Addend;
6191
28
6192
28
    // Now adjust things to handle the lane access.
6193
28
    llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6194
14
      llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6195
14
      VTy;
6196
28
    llvm::Constant *cst = cast<Constant>(Ops[3]);
6197
28
    Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6198
28
    Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6199
28
    Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6200
28
6201
28
    Ops.pop_back();
6202
28
    Int = Intrinsic::fma;
6203
28
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6204
28
  }
6205
14
  case NEON::BI__builtin_neon_vfma_laneq_v: {
6206
14
    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6207
14
    // v1f64 fma should be mapped to Neon scalar f64 fma
6208
14
    if (
VTy && 14
VTy->getElementType() == DoubleTy14
) {
6209
6
      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6210
6
      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6211
6
      llvm::Type *VTy = GetNeonType(this,
6212
6
        NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6213
6
      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6214
6
      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6215
6
      Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6216
6
      Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6217
6
      return Builder.CreateBitCast(Result, Ty);
6218
6
    }
6219
8
    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6220
8
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6221
8
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6222
8
6223
8
    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6224
8
                                            VTy->getNumElements() * 2);
6225
8
    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6226
8
    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6227
8
                                               cast<ConstantInt>(Ops[3]));
6228
8
    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6229
8
6230
8
    return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6231
8
  }
6232
16
  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6233
16
    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6234
16
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6235
16
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6236
16
6237
16
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6238
16
    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6239
16
    return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6240
8
  }
6241
24
  case NEON::BI__builtin_neon_vfmas_lane_f32:
6242
24
  case NEON::BI__builtin_neon_vfmas_laneq_f32:
6243
24
  case NEON::BI__builtin_neon_vfmad_lane_f64:
6244
24
  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6245
24
    Ops.push_back(EmitScalarExpr(E->getArg(3)));
6246
24
    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6247
24
    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6248
24
    Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6249
24
    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6250
24
  }
6251
37
  case NEON::BI__builtin_neon_vmull_v:
6252
37
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6253
37
    Int = usgn ? 
Intrinsic::aarch64_neon_umull17
:
Intrinsic::aarch64_neon_smull20
;
6254
37
    if (
Type.isPoly()37
)
Int = Intrinsic::aarch64_neon_pmull3
;
6255
37
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6256
48
  case NEON::BI__builtin_neon_vmax_v:
6257
48
  case NEON::BI__builtin_neon_vmaxq_v:
6258
48
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6259
48
    Int = usgn ? 
Intrinsic::aarch64_neon_umax18
:
Intrinsic::aarch64_neon_smax30
;
6260
48
    if (
Ty->isFPOrFPVectorTy()48
)
Int = Intrinsic::aarch64_neon_fmax12
;
6261
48
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6262
48
  case NEON::BI__builtin_neon_vmin_v:
6263
48
  case NEON::BI__builtin_neon_vminq_v:
6264
48
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6265
48
    Int = usgn ? 
Intrinsic::aarch64_neon_umin18
:
Intrinsic::aarch64_neon_smin30
;
6266
48
    if (
Ty->isFPOrFPVectorTy()48
)
Int = Intrinsic::aarch64_neon_fmin12
;
6267
48
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6268
48
  case NEON::BI__builtin_neon_vabd_v:
6269
48
  case NEON::BI__builtin_neon_vabdq_v:
6270
48
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6271
48
    Int = usgn ? 
Intrinsic::aarch64_neon_uabd18
:
Intrinsic::aarch64_neon_sabd30
;
6272
48
    if (
Ty->isFPOrFPVectorTy()48
)
Int = Intrinsic::aarch64_neon_fabd12
;
6273
48
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6274
36
  case NEON::BI__builtin_neon_vpadal_v:
6275
36
  case NEON::BI__builtin_neon_vpadalq_v: {
6276
36
    unsigned ArgElts = VTy->getNumElements();
6277
36
    llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6278
36
    unsigned BitWidth = EltTy->getBitWidth();
6279
36
    llvm::Type *ArgTy = llvm::VectorType::get(
6280
36
        llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6281
36
    llvm::Type* Tys[2] = { VTy, ArgTy };
6282
36
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddlp18
:
Intrinsic::aarch64_neon_saddlp18
;
6283
36
    SmallVector<llvm::Value*, 1> TmpOps;
6284
36
    TmpOps.push_back(Ops[1]);
6285
36
    Function *F = CGM.getIntrinsic(Int, Tys);
6286
36
    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6287
36
    llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6288
36
    return Builder.CreateAdd(tmp, addend);
6289
36
  }
6290
45
  case NEON::BI__builtin_neon_vpmin_v:
6291
45
  case NEON::BI__builtin_neon_vpminq_v:
6292
45
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6293
45
    Int = usgn ? 
Intrinsic::aarch64_neon_uminp18
:
Intrinsic::aarch64_neon_sminp27
;
6294
45
    if (
Ty->isFPOrFPVectorTy()45
)
Int = Intrinsic::aarch64_neon_fminp9
;
6295
45
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6296
45
  case NEON::BI__builtin_neon_vpmax_v:
6297
45
  case NEON::BI__builtin_neon_vpmaxq_v:
6298
45
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6299
45
    Int = usgn ? 
Intrinsic::aarch64_neon_umaxp18
:
Intrinsic::aarch64_neon_smaxp27
;
6300
45
    if (
Ty->isFPOrFPVectorTy()45
)
Int = Intrinsic::aarch64_neon_fmaxp9
;
6301
45
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6302
12
  case NEON::BI__builtin_neon_vminnm_v:
6303
12
  case NEON::BI__builtin_neon_vminnmq_v:
6304
12
    Int = Intrinsic::aarch64_neon_fminnm;
6305
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6306
12
  case NEON::BI__builtin_neon_vmaxnm_v:
6307
12
  case NEON::BI__builtin_neon_vmaxnmq_v:
6308
12
    Int = Intrinsic::aarch64_neon_fmaxnm;
6309
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6310
3
  case NEON::BI__builtin_neon_vrecpss_f32: {
6311
3
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
6312
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6313
3
                        Ops, "vrecps");
6314
12
  }
6315
3
  case NEON::BI__builtin_neon_vrecpsd_f64: {
6316
3
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
6317
3
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6318
3
                        Ops, "vrecps");
6319
12
  }
6320
18
  case NEON::BI__builtin_neon_vqshrun_n_v:
6321
18
    Int = Intrinsic::aarch64_neon_sqshrun;
6322
18
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6323
18
  case NEON::BI__builtin_neon_vqrshrun_n_v:
6324
18
    Int = Intrinsic::aarch64_neon_sqrshrun;
6325
18
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6326
36
  case NEON::BI__builtin_neon_vqshrn_n_v:
6327
36
    Int = usgn ? 
Intrinsic::aarch64_neon_uqshrn18
:
Intrinsic::aarch64_neon_sqshrn18
;
6328
36
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6329
36
  case NEON::BI__builtin_neon_vrshrn_n_v:
6330
36
    Int = Intrinsic::aarch64_neon_rshrn;
6331
36
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6332
37
  case NEON::BI__builtin_neon_vqrshrn_n_v:
6333
37
    Int = usgn ? 
Intrinsic::aarch64_neon_uqrshrn18
:
Intrinsic::aarch64_neon_sqrshrn19
;
6334
37
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6335
15
  case NEON::BI__builtin_neon_vrnda_v:
6336
15
  case NEON::BI__builtin_neon_vrndaq_v: {
6337
15
    Int = Intrinsic::round;
6338
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6339
15
  }
6340
12
  case NEON::BI__builtin_neon_vrndi_v:
6341
12
  case NEON::BI__builtin_neon_vrndiq_v: {
6342
12
    Int = Intrinsic::nearbyint;
6343
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6344
12
  }
6345
15
  case NEON::BI__builtin_neon_vrndm_v:
6346
15
  case NEON::BI__builtin_neon_vrndmq_v: {
6347
15
    Int = Intrinsic::floor;
6348
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6349
15
  }
6350
15
  case NEON::BI__builtin_neon_vrndn_v:
6351
15
  case NEON::BI__builtin_neon_vrndnq_v: {
6352
15
    Int = Intrinsic::aarch64_neon_frintn;
6353
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6354
15
  }
6355
15
  case NEON::BI__builtin_neon_vrndp_v:
6356
15
  case NEON::BI__builtin_neon_vrndpq_v: {
6357
15
    Int = Intrinsic::ceil;
6358
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6359
15
  }
6360
15
  case NEON::BI__builtin_neon_vrndx_v:
6361
15
  case NEON::BI__builtin_neon_vrndxq_v: {
6362
15
    Int = Intrinsic::rint;
6363
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6364
15
  }
6365
15
  case NEON::BI__builtin_neon_vrnd_v:
6366
15
  case NEON::BI__builtin_neon_vrndq_v: {
6367
15
    Int = Intrinsic::trunc;
6368
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6369
15
  }
6370
70
  case NEON::BI__builtin_neon_vceqz_v:
6371
70
  case NEON::BI__builtin_neon_vceqzq_v:
6372
70
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6373
70
                                         ICmpInst::ICMP_EQ, "vceqz");
6374
36
  case NEON::BI__builtin_neon_vcgez_v:
6375
36
  case NEON::BI__builtin_neon_vcgezq_v:
6376
36
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6377
36
                                         ICmpInst::ICMP_SGE, "vcgez");
6378
36
  case NEON::BI__builtin_neon_vclez_v:
6379
36
  case NEON::BI__builtin_neon_vclezq_v:
6380
36
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6381
36
                                         ICmpInst::ICMP_SLE, "vclez");
6382
36
  case NEON::BI__builtin_neon_vcgtz_v:
6383
36
  case NEON::BI__builtin_neon_vcgtzq_v:
6384
36
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6385
36
                                         ICmpInst::ICMP_SGT, "vcgtz");
6386
36
  case NEON::BI__builtin_neon_vcltz_v:
6387
36
  case NEON::BI__builtin_neon_vcltzq_v:
6388
36
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6389
36
                                         ICmpInst::ICMP_SLT, "vcltz");
6390
12
  case NEON::BI__builtin_neon_vcvt_f64_v:
6391
12
  case NEON::BI__builtin_neon_vcvtq_f64_v:
6392
12
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6393
12
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6394
6
    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6395
6
                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6396
3
  case NEON::BI__builtin_neon_vcvt_f64_f32: {
6397
3
    assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6398
3
           "unexpected vcvt_f64_f32 builtin");
6399
3
    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6400
3
    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6401
3
6402
3
    return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6403
12
  }
6404
3
  case NEON::BI__builtin_neon_vcvt_f32_f64: {
6405
3
    assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6406
3
           "unexpected vcvt_f32_f64 builtin");
6407
3
    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6408
3
    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6409
3
6410
3
    return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6411
12
  }
6412
24
  case NEON::BI__builtin_neon_vcvt_s32_v:
6413
24
  case NEON::BI__builtin_neon_vcvt_u32_v:
6414
24
  case NEON::BI__builtin_neon_vcvt_s64_v:
6415
24
  case NEON::BI__builtin_neon_vcvt_u64_v:
6416
24
  case NEON::BI__builtin_neon_vcvtq_s32_v:
6417
24
  case NEON::BI__builtin_neon_vcvtq_u32_v:
6418
24
  case NEON::BI__builtin_neon_vcvtq_s64_v:
6419
24
  case NEON::BI__builtin_neon_vcvtq_u64_v: {
6420
24
    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6421
24
    if (usgn)
6422
12
      return Builder.CreateFPToUI(Ops[0], Ty);
6423
12
    return Builder.CreateFPToSI(Ops[0], Ty);
6424
12
  }
6425
24
  case NEON::BI__builtin_neon_vcvta_s32_v:
6426
24
  case NEON::BI__builtin_neon_vcvtaq_s32_v:
6427
24
  case NEON::BI__builtin_neon_vcvta_u32_v:
6428
24
  case NEON::BI__builtin_neon_vcvtaq_u32_v:
6429
24
  case NEON::BI__builtin_neon_vcvta_s64_v:
6430
24
  case NEON::BI__builtin_neon_vcvtaq_s64_v:
6431
24
  case NEON::BI__builtin_neon_vcvta_u64_v:
6432
24
  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6433
24
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtau12
:
Intrinsic::aarch64_neon_fcvtas12
;
6434
24
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6435
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6436
24
  }
6437
24
  case NEON::BI__builtin_neon_vcvtm_s32_v:
6438
24
  case NEON::BI__builtin_neon_vcvtmq_s32_v:
6439
24
  case NEON::BI__builtin_neon_vcvtm_u32_v:
6440
24
  case NEON::BI__builtin_neon_vcvtmq_u32_v:
6441
24
  case NEON::BI__builtin_neon_vcvtm_s64_v:
6442
24
  case NEON::BI__builtin_neon_vcvtmq_s64_v:
6443
24
  case NEON::BI__builtin_neon_vcvtm_u64_v:
6444
24
  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6445
24
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtmu12
:
Intrinsic::aarch64_neon_fcvtms12
;
6446
24
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6447
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6448
24
  }
6449
24
  case NEON::BI__builtin_neon_vcvtn_s32_v:
6450
24
  case NEON::BI__builtin_neon_vcvtnq_s32_v:
6451
24
  case NEON::BI__builtin_neon_vcvtn_u32_v:
6452
24
  case NEON::BI__builtin_neon_vcvtnq_u32_v:
6453
24
  case NEON::BI__builtin_neon_vcvtn_s64_v:
6454
24
  case NEON::BI__builtin_neon_vcvtnq_s64_v:
6455
24
  case NEON::BI__builtin_neon_vcvtn_u64_v:
6456
24
  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6457
24
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtnu12
:
Intrinsic::aarch64_neon_fcvtns12
;
6458
24
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6459
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6460
24
  }
6461
24
  case NEON::BI__builtin_neon_vcvtp_s32_v:
6462
24
  case NEON::BI__builtin_neon_vcvtpq_s32_v:
6463
24
  case NEON::BI__builtin_neon_vcvtp_u32_v:
6464
24
  case NEON::BI__builtin_neon_vcvtpq_u32_v:
6465
24
  case NEON::BI__builtin_neon_vcvtp_s64_v:
6466
24
  case NEON::BI__builtin_neon_vcvtpq_s64_v:
6467
24
  case NEON::BI__builtin_neon_vcvtp_u64_v:
6468
24
  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6469
24
    Int = usgn ? 
Intrinsic::aarch64_neon_fcvtpu12
:
Intrinsic::aarch64_neon_fcvtps12
;
6470
24
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6471
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6472
24
  }
6473
15
  case NEON::BI__builtin_neon_vmulx_v:
6474
15
  case NEON::BI__builtin_neon_vmulxq_v: {
6475
15
    Int = Intrinsic::aarch64_neon_fmulx;
6476
15
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6477
15
  }
6478
7
  case NEON::BI__builtin_neon_vmul_lane_v:
6479
7
  case NEON::BI__builtin_neon_vmul_laneq_v: {
6480
7
    // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6481
7
    bool Quad = false;
6482
7
    if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6483
4
      Quad = true;
6484
7
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6485
7
    llvm::Type *VTy = GetNeonType(this,
6486
7
      NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6487
7
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6488
7
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6489
7
    Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6490
7
    return Builder.CreateBitCast(Result, Ty);
6491
7
  }
6492
1
  case NEON::BI__builtin_neon_vnegd_s64:
6493
1
    return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6494
9
  case NEON::BI__builtin_neon_vpmaxnm_v:
6495
9
  case NEON::BI__builtin_neon_vpmaxnmq_v: {
6496
9
    Int = Intrinsic::aarch64_neon_fmaxnmp;
6497
9
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6498
9
  }
6499
9
  case NEON::BI__builtin_neon_vpminnm_v:
6500
9
  case NEON::BI__builtin_neon_vpminnmq_v: {
6501
9
    Int = Intrinsic::aarch64_neon_fminnmp;
6502
9
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6503
9
  }
6504
12
  case NEON::BI__builtin_neon_vsqrt_v:
6505
12
  case NEON::BI__builtin_neon_vsqrtq_v: {
6506
12
    Int = Intrinsic::sqrt;
6507
12
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6508
12
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6509
12
  }
6510
18
  case NEON::BI__builtin_neon_vrbit_v:
6511
18
  case NEON::BI__builtin_neon_vrbitq_v: {
6512
18
    Int = Intrinsic::aarch64_neon_rbit;
6513
18
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6514
18
  }
6515
3
  case NEON::BI__builtin_neon_vaddv_u8:
6516
3
    // FIXME: These are handled by the AArch64 scalar code.
6517
3
    usgn = true;
6518
3
    // FALLTHROUGH
6519
6
  case NEON::BI__builtin_neon_vaddv_s8: {
6520
6
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv3
:
Intrinsic::aarch64_neon_saddv3
;
6521
6
    Ty = Int32Ty;
6522
6
    VTy = llvm::VectorType::get(Int8Ty, 8);
6523
6
    llvm::Type *Tys[2] = { Ty, VTy };
6524
6
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6525
6
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6526
6
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6527
3
  }
6528
3
  case NEON::BI__builtin_neon_vaddv_u16:
6529
3
    usgn = true;
6530
3
    // FALLTHROUGH
6531
6
  case NEON::BI__builtin_neon_vaddv_s16: {
6532
6
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv3
:
Intrinsic::aarch64_neon_saddv3
;
6533
6
    Ty = Int32Ty;
6534
6
    VTy = llvm::VectorType::get(Int16Ty, 4);
6535
6
    llvm::Type *Tys[2] = { Ty, VTy };
6536
6
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6537
6
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6538
6
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6539
3
  }
6540
3
  case NEON::BI__builtin_neon_vaddvq_u8:
6541
3
    usgn = true;
6542
3
    // FALLTHROUGH
6543
6
  case NEON::BI__builtin_neon_vaddvq_s8: {
6544
6
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv3
:
Intrinsic::aarch64_neon_saddv3
;
6545
6
    Ty = Int32Ty;
6546
6
    VTy = llvm::VectorType::get(Int8Ty, 16);
6547
6
    llvm::Type *Tys[2] = { Ty, VTy };
6548
6
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6549
6
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6550
6
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6551
3
  }
6552
3
  case NEON::BI__builtin_neon_vaddvq_u16:
6553
3
    usgn = true;
6554
3
    // FALLTHROUGH
6555
6
  case NEON::BI__builtin_neon_vaddvq_s16: {
6556
6
    Int = usgn ? 
Intrinsic::aarch64_neon_uaddv3
:
Intrinsic::aarch64_neon_saddv3
;
6557
6
    Ty = Int32Ty;
6558
6
    VTy = llvm::VectorType::get(Int16Ty, 8);
6559
6
    llvm::Type *Tys[2] = { Ty, VTy };
6560
6
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6561
6
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6562
6
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6563
3
  }
6564
3
  case NEON::BI__builtin_neon_vmaxv_u8: {
6565
3
    Int = Intrinsic::aarch64_neon_umaxv;
6566
3
    Ty = Int32Ty;
6567
3
    VTy = llvm::VectorType::get(Int8Ty, 8);
6568
3
    llvm::Type *Tys[2] = { Ty, VTy };
6569
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6570
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6571
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6572
3
  }
6573
3
  case NEON::BI__builtin_neon_vmaxv_u16: {
6574
3
    Int = Intrinsic::aarch64_neon_umaxv;
6575
3
    Ty = Int32Ty;
6576
3
    VTy = llvm::VectorType::get(Int16Ty, 4);
6577
3
    llvm::Type *Tys[2] = { Ty, VTy };
6578
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6579
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6580
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6581
3
  }
6582
3
  case NEON::BI__builtin_neon_vmaxvq_u8: {
6583
3
    Int = Intrinsic::aarch64_neon_umaxv;
6584
3
    Ty = Int32Ty;
6585
3
    VTy = llvm::VectorType::get(Int8Ty, 16);
6586
3
    llvm::Type *Tys[2] = { Ty, VTy };
6587
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6588
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6589
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6590
3
  }
6591
3
  case NEON::BI__builtin_neon_vmaxvq_u16: {
6592
3
    Int = Intrinsic::aarch64_neon_umaxv;
6593
3
    Ty = Int32Ty;
6594
3
    VTy = llvm::VectorType::get(Int16Ty, 8);
6595
3
    llvm::Type *Tys[2] = { Ty, VTy };
6596
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6597
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6598
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6599
3
  }
6600
3
  case NEON::BI__builtin_neon_vmaxv_s8: {
6601
3
    Int = Intrinsic::aarch64_neon_smaxv;
6602
3
    Ty = Int32Ty;
6603
3
    VTy = llvm::VectorType::get(Int8Ty, 8);
6604
3
    llvm::Type *Tys[2] = { Ty, VTy };
6605
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6606
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6607
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6608
3
  }
6609
3
  case NEON::BI__builtin_neon_vmaxv_s16: {
6610
3
    Int = Intrinsic::aarch64_neon_smaxv;
6611
3
    Ty = Int32Ty;
6612
3
    VTy = llvm::VectorType::get(Int16Ty, 4);
6613
3
    llvm::Type *Tys[2] = { Ty, VTy };
6614
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6615
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6616
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6617
3
  }
6618
3
  case NEON::BI__builtin_neon_vmaxvq_s8: {
6619
3
    Int = Intrinsic::aarch64_neon_smaxv;
6620
3
    Ty = Int32Ty;
6621
3
    VTy = llvm::VectorType::get(Int8Ty, 16);
6622
3
    llvm::Type *Tys[2] = { Ty, VTy };
6623
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6624
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6625
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6626
3
  }
6627
3
  case NEON::BI__builtin_neon_vmaxvq_s16: {
6628
3
    Int = Intrinsic::aarch64_neon_smaxv;
6629
3
    Ty = Int32Ty;
6630
3
    VTy = llvm::VectorType::get(Int16Ty, 8);
6631
3
    llvm::Type *Tys[2] = { Ty, VTy };
6632
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6633
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6634
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6635
3
  }
6636
3
  case NEON::BI__builtin_neon_vminv_u8: {
6637
3
    Int = Intrinsic::aarch64_neon_uminv;
6638
3
    Ty = Int32Ty;
6639
3
    VTy = llvm::VectorType::get(Int8Ty, 8);
6640
3
    llvm::Type *Tys[2] = { Ty, VTy };
6641
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6642
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6643
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6644
3
  }
6645
3
  case NEON::BI__builtin_neon_vminv_u16: {
6646
3
    Int = Intrinsic::aarch64_neon_uminv;
6647
3
    Ty = Int32Ty;
6648
3
    VTy = llvm::VectorType::get(Int16Ty, 4);
6649
3
    llvm::Type *Tys[2] = { Ty, VTy };
6650
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6651
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6652
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6653
3
  }
6654
3
  case NEON::BI__builtin_neon_vminvq_u8: {
6655
3
    Int = Intrinsic::aarch64_neon_uminv;
6656
3
    Ty = Int32Ty;
6657
3
    VTy = llvm::VectorType::get(Int8Ty, 16);
6658
3
    llvm::Type *Tys[2] = { Ty, VTy };
6659
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6660
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6661
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6662
3
  }
6663
3
  case NEON::BI__builtin_neon_vminvq_u16: {
6664
3
    Int = Intrinsic::aarch64_neon_uminv;
6665
3
    Ty = Int32Ty;
6666
3
    VTy = llvm::VectorType::get(Int16Ty, 8);
6667
3
    llvm::Type *Tys[2] = { Ty, VTy };
6668
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6669
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6670
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6671
3
  }
6672
3
  case NEON::BI__builtin_neon_vminv_s8: {
6673
3
    Int = Intrinsic::aarch64_neon_sminv;
6674
3
    Ty = Int32Ty;
6675
3
    VTy = llvm::VectorType::get(Int8Ty, 8);
6676
3
    llvm::Type *Tys[2] = { Ty, VTy };
6677
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6678
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6679
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6680
3
  }
6681
3
  case NEON::BI__builtin_neon_vminv_s16: {
6682
3
    Int = Intrinsic::aarch64_neon_sminv;
6683
3
    Ty = Int32Ty;
6684
3
    VTy = llvm::VectorType::get(Int16Ty, 4);
6685
3
    llvm::Type *Tys[2] = { Ty, VTy };
6686
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6687
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6688
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6689
3
  }
6690
3
  case NEON::BI__builtin_neon_vminvq_s8: {
6691
3
    Int = Intrinsic::aarch64_neon_sminv;
6692
3
    Ty = Int32Ty;
6693
3
    VTy = llvm::VectorType::get(Int8Ty, 16);
6694
3
    llvm::Type *Tys[2] = { Ty, VTy };
6695
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6696
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6697
3
    return Builder.CreateTrunc(Ops[0], Int8Ty);
6698
3
  }
6699
3
  case NEON::BI__builtin_neon_vminvq_s16: {
6700
3
    Int = Intrinsic::aarch64_neon_sminv;
6701
3
    Ty = Int32Ty;
6702
3
    VTy = llvm::VectorType::get(Int16Ty, 8);
6703
3
    llvm::Type *Tys[2] = { Ty, VTy };
6704
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6705
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6706
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6707
3
  }
6708
3
  case NEON::BI__builtin_neon_vmul_n_f64: {
6709
3
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6710
3
    Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6711
3
    return Builder.CreateFMul(Ops[0], RHS);
6712
3
  }
6713
3
  case NEON::BI__builtin_neon_vaddlv_u8: {
6714
3
    Int = Intrinsic::aarch64_neon_uaddlv;
6715
3
    Ty = Int32Ty;
6716
3
    VTy = llvm::VectorType::get(Int8Ty, 8);
6717
3
    llvm::Type *Tys[2] = { Ty, VTy };
6718
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6719
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6720
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6721
3
  }
6722
3
  case NEON::BI__builtin_neon_vaddlv_u16: {
6723
3
    Int = Intrinsic::aarch64_neon_uaddlv;
6724
3
    Ty = Int32Ty;
6725
3
    VTy = llvm::VectorType::get(Int16Ty, 4);
6726
3
    llvm::Type *Tys[2] = { Ty, VTy };
6727
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6728
3
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6729
3
  }
6730
3
  case NEON::BI__builtin_neon_vaddlvq_u8: {
6731
3
    Int = Intrinsic::aarch64_neon_uaddlv;
6732
3
    Ty = Int32Ty;
6733
3
    VTy = llvm::VectorType::get(Int8Ty, 16);
6734
3
    llvm::Type *Tys[2] = { Ty, VTy };
6735
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6736
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6737
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6738
3
  }
6739
3
  case NEON::BI__builtin_neon_vaddlvq_u16: {
6740
3
    Int = Intrinsic::aarch64_neon_uaddlv;
6741
3
    Ty = Int32Ty;
6742
3
    VTy = llvm::VectorType::get(Int16Ty, 8);
6743
3
    llvm::Type *Tys[2] = { Ty, VTy };
6744
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6745
3
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6746
3
  }
6747
3
  case NEON::BI__builtin_neon_vaddlv_s8: {
6748
3
    Int = Intrinsic::aarch64_neon_saddlv;
6749
3
    Ty = Int32Ty;
6750
3
    VTy = llvm::VectorType::get(Int8Ty, 8);
6751
3
    llvm::Type *Tys[2] = { Ty, VTy };
6752
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6753
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6754
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6755
3
  }
6756
3
  case NEON::BI__builtin_neon_vaddlv_s16: {
6757
3
    Int = Intrinsic::aarch64_neon_saddlv;
6758
3
    Ty = Int32Ty;
6759
3
    VTy = llvm::VectorType::get(Int16Ty, 4);
6760
3
    llvm::Type *Tys[2] = { Ty, VTy };
6761
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6762
3
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6763
3
  }
6764
3
  case NEON::BI__builtin_neon_vaddlvq_s8: {
6765
3
    Int = Intrinsic::aarch64_neon_saddlv;
6766
3
    Ty = Int32Ty;
6767
3
    VTy = llvm::VectorType::get(Int8Ty, 16);
6768
3
    llvm::Type *Tys[2] = { Ty, VTy };
6769
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6770
3
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6771
3
    return Builder.CreateTrunc(Ops[0], Int16Ty);
6772
3
  }
6773
3
  case NEON::BI__builtin_neon_vaddlvq_s16: {
6774
3
    Int = Intrinsic::aarch64_neon_saddlv;
6775
3
    Ty = Int32Ty;
6776
3
    VTy = llvm::VectorType::get(Int16Ty, 8);
6777
3
    llvm::Type *Tys[2] = { Ty, VTy };
6778
3
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6779
3
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6780
3
  }
6781
62
  case NEON::BI__builtin_neon_vsri_n_v:
6782
62
  case NEON::BI__builtin_neon_vsriq_n_v: {
6783
62
    Int = Intrinsic::aarch64_neon_vsri;
6784
62
    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6785
62
    return EmitNeonCall(Intrin, Ops, "vsri_n");
6786
62
  }
6787
62
  case NEON::BI__builtin_neon_vsli_n_v:
6788
62
  case NEON::BI__builtin_neon_vsliq_n_v: {
6789
62
    Int = Intrinsic::aarch64_neon_vsli;
6790
62
    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6791
62
    return EmitNeonCall(Intrin, Ops, "vsli_n");
6792
62
  }
6793
51
  case NEON::BI__builtin_neon_vsra_n_v:
6794
51
  case NEON::BI__builtin_neon_vsraq_n_v:
6795
51
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6796
51
    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6797
51
    return Builder.CreateAdd(Ops[0], Ops[1]);
6798
48
  case NEON::BI__builtin_neon_vrsra_n_v:
6799
48
  case NEON::BI__builtin_neon_vrsraq_n_v: {
6800
48
    Int = usgn ? 
Intrinsic::aarch64_neon_urshl24
:
Intrinsic::aarch64_neon_srshl24
;
6801
48
    SmallVector<llvm::Value*,2> TmpOps;
6802
48
    TmpOps.push_back(Ops[1]);
6803
48
    TmpOps.push_back(Ops[2]);
6804
48
    Function* F = CGM.getIntrinsic(Int, Ty);
6805
48
    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6806
48
    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6807
48
    return Builder.CreateAdd(Ops[0], tmp);
6808
48
  }
6809
48
    // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6810
48
    // of an Align parameter here.
6811
84
  case NEON::BI__builtin_neon_vld1_x2_v:
6812
84
  case NEON::BI__builtin_neon_vld1q_x2_v:
6813
84
  case NEON::BI__builtin_neon_vld1_x3_v:
6814
84
  case NEON::BI__builtin_neon_vld1q_x3_v:
6815
84
  case NEON::BI__builtin_neon_vld1_x4_v:
6816
84
  case NEON::BI__builtin_neon_vld1q_x4_v: {
6817
84
    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6818
84
    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6819
84
    llvm::Type *Tys[2] = { VTy, PTy };
6820
84
    unsigned Int;
6821
84
    switch (BuiltinID) {
6822
28
    case NEON::BI__builtin_neon_vld1_x2_v:
6823
28
    case NEON::BI__builtin_neon_vld1q_x2_v:
6824
28
      Int = Intrinsic::aarch64_neon_ld1x2;
6825
28
      break;
6826
28
    case NEON::BI__builtin_neon_vld1_x3_v:
6827
28
    case NEON::BI__builtin_neon_vld1q_x3_v:
6828
28
      Int = Intrinsic::aarch64_neon_ld1x3;
6829
28
      break;
6830
28
    case NEON::BI__builtin_neon_vld1_x4_v:
6831
28
    case NEON::BI__builtin_neon_vld1q_x4_v:
6832
28
      Int = Intrinsic::aarch64_neon_ld1x4;
6833
28
      break;
6834
84
    }
6835
84
    Function *F = CGM.getIntrinsic(Int, Tys);
6836
84
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6837
84
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6838
84
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6839
84
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6840
84
  }
6841
84
  case NEON::BI__builtin_neon_vst1_x2_v:
6842
84
  case NEON::BI__builtin_neon_vst1q_x2_v:
6843
84
  case NEON::BI__builtin_neon_vst1_x3_v:
6844
84
  case NEON::BI__builtin_neon_vst1q_x3_v:
6845
84
  case NEON::BI__builtin_neon_vst1_x4_v:
6846
84
  case NEON::BI__builtin_neon_vst1q_x4_v: {
6847
84
    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6848
84
    llvm::Type *Tys[2] = { VTy, PTy };
6849
84
    unsigned Int;
6850
84
    switch (BuiltinID) {
6851
28
    case NEON::BI__builtin_neon_vst1_x2_v:
6852
28
    case NEON::BI__builtin_neon_vst1q_x2_v:
6853
28
      Int = Intrinsic::aarch64_neon_st1x2;
6854
28
      break;
6855
28
    case NEON::BI__builtin_neon_vst1_x3_v:
6856
28
    case NEON::BI__builtin_neon_vst1q_x3_v:
6857
28
      Int = Intrinsic::aarch64_neon_st1x3;
6858
28
      break;
6859
28
    case NEON::BI__builtin_neon_vst1_x4_v:
6860
28
    case NEON::BI__builtin_neon_vst1q_x4_v:
6861
28
      Int = Intrinsic::aarch64_neon_st1x4;
6862
28
      break;
6863
84
    }
6864
84
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6865
84
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6866
84
  }
6867
88
  case NEON::BI__builtin_neon_vld1_v:
6868
88
  case NEON::BI__builtin_neon_vld1q_v: {
6869
88
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6870
88
    auto Alignment = CharUnits::fromQuantity(
6871
88
        BuiltinID == NEON::BI__builtin_neon_vld1_v ? 
842
:
1646
);
6872
88
    return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6873
88
  }
6874
86
  case NEON::BI__builtin_neon_vst1_v:
6875
86
  case NEON::BI__builtin_neon_vst1q_v:
6876
86
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6877
86
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6878
86
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6879
80
  case NEON::BI__builtin_neon_vld1_lane_v:
6880
80
  case NEON::BI__builtin_neon_vld1q_lane_v: {
6881
80
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6882
80
    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6883
80
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6884
80
    auto Alignment = CharUnits::fromQuantity(
6885
80
        BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 
840
:
1640
);
6886
80
    Ops[0] =
6887
80
        Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6888
80
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6889
80
  }
6890
80
  case NEON::BI__builtin_neon_vld1_dup_v:
6891
80
  case NEON::BI__builtin_neon_vld1q_dup_v: {
6892
80
    Value *V = UndefValue::get(Ty);
6893
80
    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6894
80
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6895
80
    auto Alignment = CharUnits::fromQuantity(
6896
80
        BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 
840
:
1640
);
6897
80
    Ops[0] =
6898
80
        Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6899
80
    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6900
80
    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6901
80
    return EmitNeonSplat(Ops[0], CI);
6902
80
  }
6903
80
  case NEON::BI__builtin_neon_vst1_lane_v:
6904
80
  case NEON::BI__builtin_neon_vst1q_lane_v:
6905
80
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6906
80
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6907
80
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6908
80
    return Builder.CreateDefaultAlignedStore(Ops[1],
6909
80
                                             Builder.CreateBitCast(Ops[0], Ty));
6910
80
  case NEON::BI__builtin_neon_vld2_v:
6911
80
  case NEON::BI__builtin_neon_vld2q_v: {
6912
80
    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6913
80
    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6914
80
    llvm::Type *Tys[2] = { VTy, PTy };
6915
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6916
80
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6917
80
    Ops[0] = Builder.CreateBitCast(Ops[0],
6918
80
                llvm::PointerType::getUnqual(Ops[1]->getType()));
6919
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6920
80
  }
6921
80
  case NEON::BI__builtin_neon_vld3_v:
6922
80
  case NEON::BI__builtin_neon_vld3q_v: {
6923
80
    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6924
80
    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6925
80
    llvm::Type *Tys[2] = { VTy, PTy };
6926
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6927
80
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6928
80
    Ops[0] = Builder.CreateBitCast(Ops[0],
6929
80
                llvm::PointerType::getUnqual(Ops[1]->getType()));
6930
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6931
80
  }
6932
80
  case NEON::BI__builtin_neon_vld4_v:
6933
80
  case NEON::BI__builtin_neon_vld4q_v: {
6934
80
    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6935
80
    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6936
80
    llvm::Type *Tys[2] = { VTy, PTy };
6937
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6938
80
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6939
80
    Ops[0] = Builder.CreateBitCast(Ops[0],
6940
80
                llvm::PointerType::getUnqual(Ops[1]->getType()));
6941
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6942
80
  }
6943
80
  case NEON::BI__builtin_neon_vld2_dup_v:
6944
80
  case NEON::BI__builtin_neon_vld2q_dup_v: {
6945
80
    llvm::Type *PTy =
6946
80
      llvm::PointerType::getUnqual(VTy->getElementType());
6947
80
    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6948
80
    llvm::Type *Tys[2] = { VTy, PTy };
6949
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6950
80
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6951
80
    Ops[0] = Builder.CreateBitCast(Ops[0],
6952
80
                llvm::PointerType::getUnqual(Ops[1]->getType()));
6953
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6954
80
  }
6955
80
  case NEON::BI__builtin_neon_vld3_dup_v:
6956
80
  case NEON::BI__builtin_neon_vld3q_dup_v: {
6957
80
    llvm::Type *PTy =
6958
80
      llvm::PointerType::getUnqual(VTy->getElementType());
6959
80
    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6960
80
    llvm::Type *Tys[2] = { VTy, PTy };
6961
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6962
80
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6963
80
    Ops[0] = Builder.CreateBitCast(Ops[0],
6964
80
                llvm::PointerType::getUnqual(Ops[1]->getType()));
6965
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6966
80
  }
6967
80
  case NEON::BI__builtin_neon_vld4_dup_v:
6968
80
  case NEON::BI__builtin_neon_vld4q_dup_v: {
6969
80
    llvm::Type *PTy =
6970
80
      llvm::PointerType::getUnqual(VTy->getElementType());
6971
80
    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6972
80
    llvm::Type *Tys[2] = { VTy, PTy };
6973
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6974
80
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6975
80
    Ops[0] = Builder.CreateBitCast(Ops[0],
6976
80
                llvm::PointerType::getUnqual(Ops[1]->getType()));
6977
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6978
80
  }
6979
80
  case NEON::BI__builtin_neon_vld2_lane_v:
6980
80
  case NEON::BI__builtin_neon_vld2q_lane_v: {
6981
80
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6982
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6983
80
    Ops.push_back(Ops[1]);
6984
80
    Ops.erase(Ops.begin()+1);
6985
80
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6986
80
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6987
80
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6988
80
    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6989
80
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6990
80
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6991
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6992
80
  }
6993
80
  case NEON::BI__builtin_neon_vld3_lane_v:
6994
80
  case NEON::BI__builtin_neon_vld3q_lane_v: {
6995
80
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6996
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6997
80
    Ops.push_back(Ops[1]);
6998
80
    Ops.erase(Ops.begin()+1);
6999
80
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7000
80
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7001
80
    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7002
80
    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7003
80
    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
7004
80
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7005
80
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7006
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7007
80
  }
7008
80
  case NEON::BI__builtin_neon_vld4_lane_v:
7009
80
  case NEON::BI__builtin_neon_vld4q_lane_v: {
7010
80
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7011
80
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7012
80
    Ops.push_back(Ops[1]);
7013
80
    Ops.erase(Ops.begin()+1);
7014
80
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7015
80
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7016
80
    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7017
80
    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7018
80
    Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7019
80
    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
7020
80
    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7021
80
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7022
80
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7023
80
  }
7024
80
  case NEON::BI__builtin_neon_vst2_v:
7025
80
  case NEON::BI__builtin_neon_vst2q_v: {
7026
80
    Ops.push_back(Ops[0]);
7027
80
    Ops.erase(Ops.begin());
7028
80
    llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7029
80
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7030
80
                        Ops, "");
7031
80
  }
7032
80
  case NEON::BI__builtin_neon_vst2_lane_v:
7033
80
  case NEON::BI__builtin_neon_vst2q_lane_v: {
7034
80
    Ops.push_back(Ops[0]);
7035
80
    Ops.erase(Ops.begin());
7036
80
    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7037
80
    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7038
80
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7039
80
                        Ops, "");
7040
80
  }
7041
80
  case NEON::BI__builtin_neon_vst3_v:
7042
80
  case NEON::BI__builtin_neon_vst3q_v: {
7043
80
    Ops.push_back(Ops[0]);
7044
80
    Ops.erase(Ops.begin());
7045
80
    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7046
80
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7047
80
                        Ops, "");
7048
80
  }
7049
80
  case NEON::BI__builtin_neon_vst3_lane_v:
7050
80
  case NEON::BI__builtin_neon_vst3q_lane_v: {
7051
80
    Ops.push_back(Ops[0]);
7052
80
    Ops.erase(Ops.begin());
7053
80
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7054
80
    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7055
80
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7056
80
                        Ops, "");
7057
80
  }
7058
80
  case NEON::BI__builtin_neon_vst4_v:
7059
80
  case NEON::BI__builtin_neon_vst4q_v: {
7060
80
    Ops.push_back(Ops[0]);
7061
80
    Ops.erase(Ops.begin());
7062
80
    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7063
80
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7064
80
                        Ops, "");
7065
80
  }
7066
80
  case NEON::BI__builtin_neon_vst4_lane_v:
7067
80
  case NEON::BI__builtin_neon_vst4q_lane_v: {
7068
80
    Ops.push_back(Ops[0]);
7069
80
    Ops.erase(Ops.begin());
7070
80
    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7071
80
    llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7072
80
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7073
80
                        Ops, "");
7074
80
  }
7075
54
  case NEON::BI__builtin_neon_vtrn_v:
7076
54
  case NEON::BI__builtin_neon_vtrnq_v: {
7077
54
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7078
54
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7079
54
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7080
54
    Value *SV = nullptr;
7081
54
7082
162
    for (unsigned vi = 0; 
vi != 2162
;
++vi108
) {
7083
108
      SmallVector<uint32_t, 16> Indices;
7084
486
      for (unsigned i = 0, e = VTy->getNumElements(); 
i != e486
;
i += 2378
) {
7085
378
        Indices.push_back(i+vi);
7086
378
        Indices.push_back(i+e+vi);
7087
378
      }
7088
108
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7089
108
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7090
108
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7091
108
    }
7092
54
    return SV;
7093
54
  }
7094
60
  case NEON::BI__builtin_neon_vuzp_v:
7095
60
  case NEON::BI__builtin_neon_vuzpq_v: {
7096
60
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7097
60
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7098
60
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7099
60
    Value *SV = nullptr;
7100
60
7101
180
    for (unsigned vi = 0; 
vi != 2180
;
++vi120
) {
7102
120
      SmallVector<uint32_t, 16> Indices;
7103
988
      for (unsigned i = 0, e = VTy->getNumElements(); 
i != e988
;
++i868
)
7104
868
        Indices.push_back(2*i+vi);
7105
120
7106
120
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7107
120
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7108
120
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7109
120
    }
7110
60
    return SV;
7111
60
  }
7112
61
  case NEON::BI__builtin_neon_vzip_v:
7113
61
  case NEON::BI__builtin_neon_vzipq_v: {
7114
61
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7115
61
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7116
61
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7117
61
    Value *SV = nullptr;
7118
61
7119
183
    for (unsigned vi = 0; 
vi != 2183
;
++vi122
) {
7120
122
      SmallVector<uint32_t, 16> Indices;
7121
572
      for (unsigned i = 0, e = VTy->getNumElements(); 
i != e572
;
i += 2450
) {
7122
450
        Indices.push_back((i + vi*e) >> 1);
7123
450
        Indices.push_back(((i + vi*e) >> 1)+e);
7124
450
      }
7125
122
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7126
122
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7127
122
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7128
122
    }
7129
61
    return SV;
7130
61
  }
7131
0
  case NEON::BI__builtin_neon_vqtbl1q_v: {
7132
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7133
0
                        Ops, "vtbl1");
7134
61
  }
7135
0
  case NEON::BI__builtin_neon_vqtbl2q_v: {
7136
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7137
0
                        Ops, "vtbl2");
7138
61
  }
7139
0
  case NEON::BI__builtin_neon_vqtbl3q_v: {
7140
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7141
0
                        Ops, "vtbl3");
7142
61
  }
7143
0
  case NEON::BI__builtin_neon_vqtbl4q_v: {
7144
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7145
0
                        Ops, "vtbl4");
7146
61
  }
7147
0
  case NEON::BI__builtin_neon_vqtbx1q_v: {
7148
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7149
0
                        Ops, "vtbx1");
7150
61
  }
7151
0
  case NEON::BI__builtin_neon_vqtbx2q_v: {
7152
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7153
0
                        Ops, "vtbx2");
7154
61
  }
7155
0
  case NEON::BI__builtin_neon_vqtbx3q_v: {
7156
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7157
0
                        Ops, "vtbx3");
7158
61
  }
7159
0
  case NEON::BI__builtin_neon_vqtbx4q_v: {
7160
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7161
0
                        Ops, "vtbx4");
7162
61
  }
7163
24
  case NEON::BI__builtin_neon_vsqadd_v:
7164
24
  case NEON::BI__builtin_neon_vsqaddq_v: {
7165
24
    Int = Intrinsic::aarch64_neon_usqadd;
7166
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7167
24
  }
7168
24
  case NEON::BI__builtin_neon_vuqadd_v:
7169
24
  case NEON::BI__builtin_neon_vuqaddq_v: {
7170
24
    Int = Intrinsic::aarch64_neon_suqadd;
7171
24
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7172
0
  }
7173
17.3k
  }
7174
17.3k
}
7175
7176
llvm::Value *CodeGenFunction::
7177
8
BuildVector(ArrayRef<llvm::Value*> Ops) {
7178
8
  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7179
8
         "Not a power-of-two sized vector!");
7180
8
  bool AllConstants = true;
7181
16
  for (unsigned i = 0, e = Ops.size(); 
i != e && 16
AllConstants16
;
++i8
)
7182
8
    AllConstants &= isa<Constant>(Ops[i]);
7183
8
7184
8
  // If this is a constant vector, create a ConstantVector.
7185
8
  if (
AllConstants8
) {
7186
0
    SmallVector<llvm::Constant*, 16> CstOps;
7187
0
    for (unsigned i = 0, e = Ops.size(); 
i != e0
;
++i0
)
7188
0
      CstOps.push_back(cast<Constant>(Ops[i]));
7189
0
    return llvm::ConstantVector::get(CstOps);
7190
0
  }
7191
8
7192
8
  // Otherwise, insertelement the values to build the vector.
7193
8
  Value *Result =
7194
8
    llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7195
8
7196
40
  for (unsigned i = 0, e = Ops.size(); 
i != e40
;
++i32
)
7197
32
    Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7198
8
7199
8
  return Result;
7200
8
}
7201
7202
// Convert the mask from an integer type to a vector of i1.
7203
static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7204
1.78k
                              unsigned NumElts) {
7205
1.78k
7206
1.78k
  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7207
1.78k
                         cast<IntegerType>(Mask->getType())->getBitWidth());
7208
1.78k
  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7209
1.78k
7210
1.78k
  // If we have less than 8 elements, then the starting mask was an i8 and
7211
1.78k
  // we need to extract down to the right number of elements.
7212
1.78k
  if (
NumElts < 81.78k
) {
7213
444
    uint32_t Indices[4];
7214
1.93k
    for (unsigned i = 0; 
i != NumElts1.93k
;
++i1.48k
)
7215
1.48k
      Indices[i] = i;
7216
444
    MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7217
444
                                             makeArrayRef(Indices, NumElts),
7218
444
                                             "extract");
7219
444
  }
7220
1.78k
  return MaskVec;
7221
1.78k
}
7222
7223
static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7224
                                 SmallVectorImpl<Value *> &Ops,
7225
39
                                 unsigned Align) {
7226
39
  // Cast the pointer to right type.
7227
39
  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7228
39
                               llvm::PointerType::getUnqual(Ops[1]->getType()));
7229
39
7230
39
  // If the mask is all ones just emit a regular store.
7231
39
  if (const auto *C = dyn_cast<Constant>(Ops[2]))
7232
3
    
if (3
C->isAllOnesValue()3
)
7233
3
      return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7234
36
7235
36
  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7236
36
                                   Ops[1]->getType()->getVectorNumElements());
7237
36
7238
36
  return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7239
36
}
7240
7241
static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7242
75
                                SmallVectorImpl<Value *> &Ops, unsigned Align) {
7243
75
  // Cast the pointer to right type.
7244
75
  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7245
75
                               llvm::PointerType::getUnqual(Ops[1]->getType()));
7246
75
7247
75
  // If the mask is all ones just emit a regular store.
7248
75
  if (const auto *C = dyn_cast<Constant>(Ops[2]))
7249
3
    
if (3
C->isAllOnesValue()3
)
7250
3
      return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7251
72
7252
72
  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7253
72
                                   Ops[1]->getType()->getVectorNumElements());
7254
72
7255
72
  return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7256
72
}
7257
7258
static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7259
                                        SmallVectorImpl<Value *> &Ops,
7260
                                        llvm::Type *DstTy,
7261
                                        unsigned SrcSizeInBits,
7262
6
                                        unsigned Align) {
7263
6
  // Load the subvector.
7264
6
  Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7265
6
7266
6
  // Create broadcast mask.
7267
6
  unsigned NumDstElts = DstTy->getVectorNumElements();
7268
6
  unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7269
6
7270
6
  SmallVector<uint32_t, 8> Mask;
7271
18
  for (unsigned i = 0; 
i != NumDstElts18
;
i += NumSrcElts12
)
7272
48
    
for (unsigned j = 0; 12
j != NumSrcElts48
;
++j36
)
7273
36
      Mask.push_back(j);
7274
6
7275
6
  return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7276
6
}
7277
7278
static Value *EmitX86Select(CodeGenFunction &CGF,
7279
1.46k
                            Value *Mask, Value *Op0, Value *Op1) {
7280
1.46k
7281
1.46k
  // If the mask is all ones just return first argument.
7282
1.46k
  if (const auto *C = dyn_cast<Constant>(Mask))
7283
53
    
if (53
C->isAllOnesValue()53
)
7284
53
      return Op0;
7285
1.41k
7286
1.41k
  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7287
1.41k
7288
1.41k
  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7289
1.41k
}
7290
7291
static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7292
500
                                   bool Signed, SmallVectorImpl<Value *> &Ops) {
7293
500
  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7294
500
  Value *Cmp;
7295
500
7296
500
  if (
CC == 3500
) {
7297
0
    Cmp = Constant::getNullValue(
7298
0
                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7299
500
  } else 
if (500
CC == 7500
) {
7300
0
    Cmp = Constant::getAllOnesValue(
7301
0
                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7302
500
  } else {
7303
500
    ICmpInst::Predicate Pred;
7304
500
    switch (CC) {
7305
0
    
default: 0
llvm_unreachable0
("Unknown condition code");
7306
137
    case 0: Pred = ICmpInst::ICMP_EQ;  break;
7307
74
    
case 1: Pred = Signed ? 74
ICmpInst::ICMP_SLT38
:
ICmpInst::ICMP_ULT36
; break;
7308
73
    
case 2: Pred = Signed ? 73
ICmpInst::ICMP_SLE37
:
ICmpInst::ICMP_ULE36
; break;
7309
72
    case 4: Pred = ICmpInst::ICMP_NE;  break;
7310
72
    
case 5: Pred = Signed ? 72
ICmpInst::ICMP_SGE36
:
ICmpInst::ICMP_UGE36
; break;
7311
72
    
case 6: Pred = Signed ? 72
ICmpInst::ICMP_SGT36
:
ICmpInst::ICMP_UGT36
; break;
7312
500
    }
7313
500
    Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7314
500
  }
7315
500
7316
500
  const auto *C = dyn_cast<Constant>(Ops.back());
7317
500
  if (
!C || 500
!C->isAllOnesValue()250
)
7318
250
    Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7319
500
7320
500
  if (
NumElts < 8500
) {
7321
82
    uint32_t Indices[8];
7322
354
    for (unsigned i = 0; 
i != NumElts354
;
++i272
)
7323
272
      Indices[i] = i;
7324
466
    for (unsigned i = NumElts; 
i != 8466
;
++i384
)
7325
384
      Indices[i] = i % NumElts + NumElts;
7326
82
    Cmp = CGF.Builder.CreateShuffleVector(
7327
82
        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7328
82
  }
7329
500
  return CGF.Builder.CreateBitCast(Cmp,
7330
500
                                   IntegerType::get(CGF.getLLVMContext(),
7331
500
                                                    std::max(NumElts, 8U)));
7332
500
}
7333
7334
44
static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
7335
44
7336
44
  llvm::Type *Ty = Ops[0]->getType();
7337
44
  Value *Zero = llvm::Constant::getNullValue(Ty);
7338
44
  Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
7339
44
  Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
7340
44
  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
7341
44
  if (Ops.size() == 1)
7342
22
    return Res;
7343
22
  return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
7344
22
}
7345
7346
static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7347
195
                            ArrayRef<Value *> Ops) {
7348
195
  Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7349
195
  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7350
195
7351
195
  if (Ops.size() == 2)
7352
92
    return Res;
7353
103
7354
195
  assert(Ops.size() == 4);
7355
103
  return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7356
103
}
7357
7358
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
7359
18
                              llvm::Type *DstTy) {
7360
18
  unsigned NumberOfElements = DstTy->getVectorNumElements();
7361
18
  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7362
18
  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7363
18
}
7364
7365
35
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
7366
35
  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
7367
35
  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
7368
35
  return EmitX86CpuIs(CPUStr);
7369
35
}
7370
7371
35
Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
7372
35
7373
35
  // This enum contains the vendor, type, and subtype enums from the
7374
35
  // runtime library concatenated together. The _START labels mark
7375
35
  // the start and are used to adjust the value into the correct
7376
35
  // encoding space.
7377
35
  enum X86CPUs {
7378
35
    INTEL = 1,
7379
35
    AMD,
7380
35
    CPU_TYPE_START,
7381
35
    INTEL_BONNELL,
7382
35
    INTEL_CORE2,
7383
35
    INTEL_COREI7,
7384
35
    AMDFAM10H,
7385
35
    AMDFAM15H,
7386
35
    INTEL_SILVERMONT,
7387
35
    INTEL_KNL,
7388
35
    AMD_BTVER1,
7389
35
    AMD_BTVER2,
7390
35
    CPU_SUBTYPE_START,
7391
35
    INTEL_COREI7_NEHALEM,
7392
35
    INTEL_COREI7_WESTMERE,
7393
35
    INTEL_COREI7_SANDYBRIDGE,
7394
35
    AMDFAM10H_BARCELONA,
7395
35
    AMDFAM10H_SHANGHAI,
7396
35
    AMDFAM10H_ISTANBUL,
7397
35
    AMDFAM15H_BDVER1,
7398
35
    AMDFAM15H_BDVER2,
7399
35
    AMDFAM15H_BDVER3,
7400
35
    AMDFAM15H_BDVER4,
7401
35
    AMDFAM17H_ZNVER1,
7402
35
    INTEL_COREI7_IVYBRIDGE,
7403
35
    INTEL_COREI7_HASWELL,
7404
35
    INTEL_COREI7_BROADWELL,
7405
35
    INTEL_COREI7_SKYLAKE,
7406
35
    INTEL_COREI7_SKYLAKE_AVX512,
7407
35
  };
7408
35
7409
35
  X86CPUs CPU =
7410
35
    StringSwitch<X86CPUs>(CPUStr)
7411
35
      .Case("amd", AMD)
7412
35
      .Case("amdfam10h", AMDFAM10H)
7413
35
      .Case("amdfam10", AMDFAM10H)
7414
35
      .Case("amdfam15h", AMDFAM15H)
7415
35
      .Case("amdfam15", AMDFAM15H)
7416
35
      .Case("atom", INTEL_BONNELL)
7417
35
      .Case("barcelona", AMDFAM10H_BARCELONA)
7418
35
      .Case("bdver1", AMDFAM15H_BDVER1)
7419
35
      .Case("bdver2", AMDFAM15H_BDVER2)
7420
35
      .Case("bdver3", AMDFAM15H_BDVER3)
7421
35
      .Case("bdver4", AMDFAM15H_BDVER4)
7422
35
      .Case("bonnell", INTEL_BONNELL)
7423
35
      .Case("broadwell", INTEL_COREI7_BROADWELL)
7424
35
      .Case("btver1", AMD_BTVER1)
7425
35
      .Case("btver2", AMD_BTVER2)
7426
35
      .Case("core2", INTEL_CORE2)
7427
35
      .Case("corei7", INTEL_COREI7)
7428
35
      .Case("haswell", INTEL_COREI7_HASWELL)
7429
35
      .Case("intel", INTEL)
7430
35
      .Case("istanbul", AMDFAM10H_ISTANBUL)
7431
35
      .Case("ivybridge", INTEL_COREI7_IVYBRIDGE)
7432
35
      .Case("knl", INTEL_KNL)
7433
35
      .Case("nehalem", INTEL_COREI7_NEHALEM)
7434
35
      .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE)
7435
35
      .Case("shanghai", AMDFAM10H_SHANGHAI)
7436
35
      .Case("silvermont", INTEL_SILVERMONT)
7437
35
      .Case("skylake", INTEL_COREI7_SKYLAKE)
7438
35
      .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512)
7439
35
      .Case("slm", INTEL_SILVERMONT)
7440
35
      .Case("westmere", INTEL_COREI7_WESTMERE)
7441
35
      .Case("znver1", AMDFAM17H_ZNVER1);
7442
35
7443
35
  llvm::Type *Int32Ty = Builder.getInt32Ty();
7444
35
7445
35
  // Matching the struct layout from the compiler-rt/libgcc structure that is
7446
35
  // filled in:
7447
35
  // unsigned int __cpu_vendor;
7448
35
  // unsigned int __cpu_type;
7449
35
  // unsigned int __cpu_subtype;
7450
35
  // unsigned int __cpu_features[1];
7451
35
  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7452
35
                                          llvm::ArrayType::get(Int32Ty, 1));
7453
35
7454
35
  // Grab the global __cpu_model.
7455
35
  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7456
35
7457
35
  // Calculate the index needed to access the correct field based on the
7458
35
  // range. Also adjust the expected value.
7459
35
  unsigned Index;
7460
35
  unsigned Value;
7461
35
  if (
CPU > CPU_SUBTYPE_START35
) {
7462
18
    Index = 2;
7463
18
    Value = CPU - CPU_SUBTYPE_START;
7464
35
  } else 
if (17
CPU > CPU_TYPE_START17
) {
7465
13
    Index = 1;
7466
13
    Value = CPU - CPU_TYPE_START;
7467
17
  } else {
7468
4
    Index = 0;
7469
4
    Value = CPU;
7470
4
  }
7471
35
7472
35
  // Grab the appropriate field from __cpu_model.
7473
35
  llvm::Value *Idxs[] = {
7474
35
    ConstantInt::get(Int32Ty, 0),
7475
35
    ConstantInt::get(Int32Ty, Index)
7476
35
  };
7477
35
  llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
7478
35
  CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
7479
35
7480
35
  // Check the value of the field against the requested value.
7481
35
  return Builder.CreateICmpEQ(CpuValue,
7482
35
                                  llvm::ConstantInt::get(Int32Ty, Value));
7483
35
}
7484
7485
34
Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
7486
34
  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7487
34
  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7488
34
  return EmitX86CpuSupports(FeatureStr);
7489
34
}
7490
7491
34
Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
7492
34
  // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7493
34
  // based mapping.
7494
34
  // Processor features and mapping to processor feature value.
7495
34
  enum X86Features {
7496
34
    CMOV = 0,
7497
34
    MMX,
7498
34
    POPCNT,
7499
34
    SSE,
7500
34
    SSE2,
7501
34
    SSE3,
7502
34
    SSSE3,
7503
34
    SSE4_1,
7504
34
    SSE4_2,
7505
34
    AVX,
7506
34
    AVX2,
7507
34
    SSE4_A,
7508
34
    FMA4,
7509
34
    XOP,
7510
34
    FMA,
7511
34
    AVX512F,
7512
34
    BMI,
7513
34
    BMI2,
7514
34
    AES,
7515
34
    PCLMUL,
7516
34
    AVX512VL,
7517
34
    AVX512BW,
7518
34
    AVX512DQ,
7519
34
    AVX512CD,
7520
34
    AVX512ER,
7521
34
    AVX512PF,
7522
34
    AVX512VBMI,
7523
34
    AVX512IFMA,
7524
34
    AVX5124VNNIW,
7525
34
    AVX5124FMAPS,
7526
34
    AVX512VPOPCNTDQ,
7527
34
    MAX
7528
34
  };
7529
34
7530
34
  uint32_t FeaturesMask = 0;
7531
34
7532
34
  for (const StringRef &FeatureStr : FeatureStrs) {
7533
34
    X86Features Feature =
7534
34
        StringSwitch<X86Features>(FeatureStr)
7535
34
            .Case("cmov", X86Features::CMOV)
7536
34
            .Case("mmx", X86Features::MMX)
7537
34
            .Case("popcnt", X86Features::POPCNT)
7538
34
            .Case("sse", X86Features::SSE)
7539
34
            .Case("sse2", X86Features::SSE2)
7540
34
            .Case("sse3", X86Features::SSE3)
7541
34
            .Case("ssse3", X86Features::SSSE3)
7542
34
            .Case("sse4.1", X86Features::SSE4_1)
7543
34
            .Case("sse4.2", X86Features::SSE4_2)
7544
34
            .Case("avx", X86Features::AVX)
7545
34
            .Case("avx2", X86Features::AVX2)
7546
34
            .Case("sse4a", X86Features::SSE4_A)
7547
34
            .Case("fma4", X86Features::FMA4)
7548
34
            .Case("xop", X86Features::XOP)
7549
34
            .Case("fma", X86Features::FMA)
7550
34
            .Case("avx512f", X86Features::AVX512F)
7551
34
            .Case("bmi", X86Features::BMI)
7552
34
            .Case("bmi2", X86Features::BMI2)
7553
34
            .Case("aes", X86Features::AES)
7554
34
            .Case("pclmul", X86Features::PCLMUL)
7555
34
            .Case("avx512vl", X86Features::AVX512VL)
7556
34
            .Case("avx512bw", X86Features::AVX512BW)
7557
34
            .Case("avx512dq", X86Features::AVX512DQ)
7558
34
            .Case("avx512cd", X86Features::AVX512CD)
7559
34
            .Case("avx512er", X86Features::AVX512ER)
7560
34
            .Case("avx512pf", X86Features::AVX512PF)
7561
34
            .Case("avx512vbmi", X86Features::AVX512VBMI)
7562
34
            .Case("avx512ifma", X86Features::AVX512IFMA)
7563
34
            .Case("avx5124vnniw", X86Features::AVX5124VNNIW)
7564
34
            .Case("avx5124fmaps", X86Features::AVX5124FMAPS)
7565
34
            .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7566
34
            .Default(X86Features::MAX);
7567
34
    assert(Feature != X86Features::MAX && "Invalid feature!");
7568
34
    FeaturesMask |= (1U << Feature);
7569
34
  }
7570
34
7571
34
  // Matching the struct layout from the compiler-rt/libgcc structure that is
7572
34
  // filled in:
7573
34
  // unsigned int __cpu_vendor;
7574
34
  // unsigned int __cpu_type;
7575
34
  // unsigned int __cpu_subtype;
7576
34
  // unsigned int __cpu_features[1];
7577
34
  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7578
34
                                          llvm::ArrayType::get(Int32Ty, 1));
7579
34
7580
34
  // Grab the global __cpu_model.
7581
34
  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7582
34
7583
34
  // Grab the first (0th) element from the field __cpu_features off of the
7584
34
  // global in the struct STy.
7585
34
  Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
7586
34
                   ConstantInt::get(Int32Ty, 0)};
7587
34
  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7588
34
  Value *Features =
7589
34
      Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
7590
34
7591
34
  // Check the value of the bit corresponding to the feature requested.
7592
34
  Value *Bitset = Builder.CreateAnd(
7593
34
      Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
7594
34
  return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7595
34
}
7596
7597
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7598
2.70k
                                           const CallExpr *E) {
7599
2.70k
  if (BuiltinID == X86::BI__builtin_cpu_is)
7600
35
    return EmitX86CpuIs(E);
7601
2.67k
  
if (2.67k
BuiltinID == X86::BI__builtin_cpu_supports2.67k
)
7602
34
    return EmitX86CpuSupports(E);
7603
2.63k
7604
2.63k
  SmallVector<Value*, 4> Ops;
7605
2.63k
7606
2.63k
  // Find out if any arguments are required to be integer constant expressions.
7607
2.63k
  unsigned ICEArguments = 0;
7608
2.63k
  ASTContext::GetBuiltinTypeError Error;
7609
2.63k
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7610
2.63k
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
7611
2.63k
7612
10.3k
  for (unsigned i = 0, e = E->getNumArgs(); 
i != e10.3k
;
i++7.69k
) {
7613
7.69k
    // If this is a normal argument, just emit it as a scalar.
7614
7.69k
    if (
(ICEArguments & (1 << i)) == 07.69k
) {
7615
7.17k
      Ops.push_back(EmitScalarExpr(E->getArg(i)));
7616
7.17k
      continue;
7617
7.17k
    }
7618
521
7619
521
    // If this is required to be a constant, constant fold it so that we know
7620
521
    // that the generated intrinsic gets a ConstantInt.
7621
521
    llvm::APSInt Result;
7622
521
    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7623
521
    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7624
521
    Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7625
521
  }
7626
2.63k
7627
2.63k
  // These exist so that the builtin that takes an immediate can be bounds
7628
2.63k
  // checked by clang to avoid passing bad immediates to the backend. Since
7629
2.63k
  // AVX has a larger immediate than SSE we would need separate builtins to
7630
2.63k
  // do the different bounds checking. Rather than create a clang specific
7631
2.63k
  // SSE only builtin, this implements eight separate builtins to match gcc
7632
2.63k
  // implementation.
7633
44
  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7634
44
    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7635
44
    llvm::Function *F = CGM.getIntrinsic(ID);
7636
44
    return Builder.CreateCall(F, Ops);
7637
44
  };
7638
2.63k
7639
2.63k
  // For the vector forms of FP comparisons, translate the builtins directly to
7640
2.63k
  // IR.
7641
2.63k
  // TODO: The builtins could be removed if the SSE header files used vector
7642
2.63k
  // extension comparisons directly (vector ordered/unordered may need
7643
2.63k
  // additional support via __builtin_isnan()).
7644
66
  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7645
66
    Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7646
66
    llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7647
66
    llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7648
66
    Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7649
66
    return Builder.CreateBitCast(Sext, FPVecTy);
7650
66
  };
7651
2.63k
7652
2.63k
  switch (BuiltinID) {
7653
0
  default: return nullptr;
7654
1
  case X86::BI__builtin_cpu_init: {
7655
1
    llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
7656
1
                                                      /*Variadic*/false);
7657
1
    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy,
7658
1
                                                     "__cpu_indicator_init");
7659
1
    return Builder.CreateCall(Func);
7660
2.63k
  }
7661
0
  case X86::BI_mm_prefetch: {
7662
0
    Value *Address = Ops[0];
7663
0
    Value *RW = ConstantInt::get(Int32Ty, 0);
7664
0
    Value *Locality = Ops[1];
7665
0
    Value *Data = ConstantInt::get(Int32Ty, 1);
7666
0
    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7667
0
    return Builder.CreateCall(F, {Address, RW, Locality, Data});
7668
2.63k
  }
7669
3
  case X86::BI_mm_clflush: {
7670
3
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7671
3
                              Ops[0]);
7672
2.63k
  }
7673
3
  case X86::BI_mm_lfence: {
7674
3
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7675
2.63k
  }
7676
3
  case X86::BI_mm_mfence: {
7677
3
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7678
2.63k
  }
7679
2
  case X86::BI_mm_sfence: {
7680
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7681
2.63k
  }
7682
5
  case X86::BI_mm_pause: {
7683
5
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7684
2.63k
  }
7685
2
  case X86::BI__rdtsc: {
7686
2
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7687
2.63k
  }
7688
73
  case X86::BI__builtin_ia32_undef128:
7689
73
  case X86::BI__builtin_ia32_undef256:
7690
73
  case X86::BI__builtin_ia32_undef512:
7691
73
    // The x86 definition of "undef" is not the same as the LLVM definition
7692
73
    // (PR32176). We leave optimizing away an unnecessary zero constant to the
7693
73
    // IR optimizer and backend.
7694
73
    // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7695
73
    // value, we should use that here instead of a zero.
7696
73
    return llvm::Constant::getNullValue(ConvertType(E->getType()));
7697
8
  case X86::BI__builtin_ia32_vec_init_v8qi:
7698
8
  case X86::BI__builtin_ia32_vec_init_v4hi:
7699
8
  case X86::BI__builtin_ia32_vec_init_v2si:
7700
8
    return Builder.CreateBitCast(BuildVector(Ops),
7701
8
                                 llvm::Type::getX86_MMXTy(getLLVMContext()));
7702
4
  case X86::BI__builtin_ia32_vec_ext_v2si:
7703
4
    return Builder.CreateExtractElement(Ops[0],
7704
4
                                  llvm::ConstantInt::get(Ops[1]->getType(), 0));
7705
7
  case X86::BI_mm_setcsr:
7706
7
  case X86::BI__builtin_ia32_ldmxcsr: {
7707
7
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7708
7
    Builder.CreateStore(Ops[0], Tmp);
7709
7
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7710
7
                          Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7711
7
  }
7712
11
  case X86::BI_mm_getcsr:
7713
11
  case X86::BI__builtin_ia32_stmxcsr: {
7714
11
    Address Tmp = CreateMemTemp(E->getType());
7715
11
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7716
11
                       Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7717
11
    return Builder.CreateLoad(Tmp, "stmxcsr");
7718
11
  }
7719
48
  case X86::BI__builtin_ia32_xsave:
7720
48
  case X86::BI__builtin_ia32_xsave64:
7721
48
  case X86::BI__builtin_ia32_xrstor:
7722
48
  case X86::BI__builtin_ia32_xrstor64:
7723
48
  case X86::BI__builtin_ia32_xsaveopt:
7724
48
  case X86::BI__builtin_ia32_xsaveopt64:
7725
48
  case X86::BI__builtin_ia32_xrstors:
7726
48
  case X86::BI__builtin_ia32_xrstors64:
7727
48
  case X86::BI__builtin_ia32_xsavec:
7728
48
  case X86::BI__builtin_ia32_xsavec64:
7729
48
  case X86::BI__builtin_ia32_xsaves:
7730
48
  case X86::BI__builtin_ia32_xsaves64: {
7731
48
    Intrinsic::ID ID;
7732
48
#define INTRINSIC_X86_XSAVE_ID(NAME) \
7733
48
    case X86::BI__builtin_ia32_##NAME: \
7734
48
      ID = Intrinsic::x86_##NAME; \
7735
48
      break
7736
48
    switch (BuiltinID) {
7737
0
    
default: 0
llvm_unreachable0
("Unsupported intrinsic!");
7738
5
    
INTRINSIC_X86_XSAVE_ID5
(xsave);
7739
3
    
INTRINSIC_X86_XSAVE_ID3
(xsave64);
7740
5
    
INTRINSIC_X86_XSAVE_ID5
(xrstor);
7741
3
    
INTRINSIC_X86_XSAVE_ID3
(xrstor64);
7742
5
    
INTRINSIC_X86_XSAVE_ID5
(xsaveopt);
7743
3
    
INTRINSIC_X86_XSAVE_ID3
(xsaveopt64);
7744
5
    
INTRINSIC_X86_XSAVE_ID5
(xrstors);
7745
3
    
INTRINSIC_X86_XSAVE_ID3
(xrstors64);
7746
5
    
INTRINSIC_X86_XSAVE_ID5
(xsavec);
7747
3
    
INTRINSIC_X86_XSAVE_ID3
(xsavec64);
7748
5
    
INTRINSIC_X86_XSAVE_ID5
(xsaves);
7749
3
    INTRINSIC_X86_XSAVE_ID(xsaves64);
7750
48
    }
7751
48
#undef INTRINSIC_X86_XSAVE_ID
7752
48
    Value *Mhi = Builder.CreateTrunc(
7753
48
      Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7754
48
    Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7755
48
    Ops[1] = Mhi;
7756
48
    Ops.push_back(Mlo);
7757
48
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7758
48
  }
7759
25
  case X86::BI__builtin_ia32_storedqudi128_mask:
7760
25
  case X86::BI__builtin_ia32_storedqusi128_mask:
7761
25
  case X86::BI__builtin_ia32_storedquhi128_mask:
7762
25
  case X86::BI__builtin_ia32_storedquqi128_mask:
7763
25
  case X86::BI__builtin_ia32_storeupd128_mask:
7764
25
  case X86::BI__builtin_ia32_storeups128_mask:
7765
25
  case X86::BI__builtin_ia32_storedqudi256_mask:
7766
25
  case X86::BI__builtin_ia32_storedqusi256_mask:
7767
25
  case X86::BI__builtin_ia32_storedquhi256_mask:
7768
25
  case X86::BI__builtin_ia32_storedquqi256_mask:
7769
25
  case X86::BI__builtin_ia32_storeupd256_mask:
7770
25
  case X86::BI__builtin_ia32_storeups256_mask:
7771
25
  case X86::BI__builtin_ia32_storedqudi512_mask:
7772
25
  case X86::BI__builtin_ia32_storedqusi512_mask:
7773
25
  case X86::BI__builtin_ia32_storedquhi512_mask:
7774
25
  case X86::BI__builtin_ia32_storedquqi512_mask:
7775
25
  case X86::BI__builtin_ia32_storeupd512_mask:
7776
25
  case X86::BI__builtin_ia32_storeups512_mask:
7777
25
    return EmitX86MaskedStore(*this, Ops, 1);
7778
25
7779
2
  case X86::BI__builtin_ia32_storess128_mask:
7780
2
  case X86::BI__builtin_ia32_storesd128_mask: {
7781
2
    return EmitX86MaskedStore(*this, Ops, 16);
7782
2
  }
7783
2
  case X86::BI__builtin_ia32_vpopcntd_512:
7784
2
  case X86::BI__builtin_ia32_vpopcntq_512: {
7785
2
    llvm::Type *ResultType = ConvertType(E->getType());
7786
2
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7787
2
    return Builder.CreateCall(F, Ops);
7788
2
  }
7789
18
  case X86::BI__builtin_ia32_cvtmask2b128:
7790
18
  case X86::BI__builtin_ia32_cvtmask2b256:
7791
18
  case X86::BI__builtin_ia32_cvtmask2b512:
7792
18
  case X86::BI__builtin_ia32_cvtmask2w128:
7793
18
  case X86::BI__builtin_ia32_cvtmask2w256:
7794
18
  case X86::BI__builtin_ia32_cvtmask2w512:
7795
18
  case X86::BI__builtin_ia32_cvtmask2d128:
7796
18
  case X86::BI__builtin_ia32_cvtmask2d256:
7797
18
  case X86::BI__builtin_ia32_cvtmask2d512:
7798
18
  case X86::BI__builtin_ia32_cvtmask2q128:
7799
18
  case X86::BI__builtin_ia32_cvtmask2q256:
7800
18
  case X86::BI__builtin_ia32_cvtmask2q512:
7801
18
    return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7802
18
7803
12
  case X86::BI__builtin_ia32_movdqa32store128_mask:
7804
12
  case X86::BI__builtin_ia32_movdqa64store128_mask:
7805
12
  case X86::BI__builtin_ia32_storeaps128_mask:
7806
12
  case X86::BI__builtin_ia32_storeapd128_mask:
7807
12
  case X86::BI__builtin_ia32_movdqa32store256_mask:
7808
12
  case X86::BI__builtin_ia32_movdqa64store256_mask:
7809
12
  case X86::BI__builtin_ia32_storeaps256_mask:
7810
12
  case X86::BI__builtin_ia32_storeapd256_mask:
7811
12
  case X86::BI__builtin_ia32_movdqa32store512_mask:
7812
12
  case X86::BI__builtin_ia32_movdqa64store512_mask:
7813
12
  case X86::BI__builtin_ia32_storeaps512_mask:
7814
12
  case X86::BI__builtin_ia32_storeapd512_mask: {
7815
12
    unsigned Align =
7816
12
      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7817
12
    return EmitX86MaskedStore(*this, Ops, Align);
7818
12
  }
7819
45
  case X86::BI__builtin_ia32_loadups128_mask:
7820
45
  case X86::BI__builtin_ia32_loadups256_mask:
7821
45
  case X86::BI__builtin_ia32_loadups512_mask:
7822
45
  case X86::BI__builtin_ia32_loadupd128_mask:
7823
45
  case X86::BI__builtin_ia32_loadupd256_mask:
7824
45
  case X86::BI__builtin_ia32_loadupd512_mask:
7825
45
  case X86::BI__builtin_ia32_loaddquqi128_mask:
7826
45
  case X86::BI__builtin_ia32_loaddquqi256_mask:
7827
45
  case X86::BI__builtin_ia32_loaddquqi512_mask:
7828
45
  case X86::BI__builtin_ia32_loaddquhi128_mask:
7829
45
  case X86::BI__builtin_ia32_loaddquhi256_mask:
7830
45
  case X86::BI__builtin_ia32_loaddquhi512_mask:
7831
45
  case X86::BI__builtin_ia32_loaddqusi128_mask:
7832
45
  case X86::BI__builtin_ia32_loaddqusi256_mask:
7833
45
  case X86::BI__builtin_ia32_loaddqusi512_mask:
7834
45
  case X86::BI__builtin_ia32_loaddqudi128_mask:
7835
45
  case X86::BI__builtin_ia32_loaddqudi256_mask:
7836
45
  case X86::BI__builtin_ia32_loaddqudi512_mask:
7837
45
    return EmitX86MaskedLoad(*this, Ops, 1);
7838
45
7839
4
  case X86::BI__builtin_ia32_loadss128_mask:
7840
4
  case X86::BI__builtin_ia32_loadsd128_mask:
7841
4
    return EmitX86MaskedLoad(*this, Ops, 16);
7842
4
7843
26
  case X86::BI__builtin_ia32_loadaps128_mask:
7844
26
  case X86::BI__builtin_ia32_loadaps256_mask:
7845
26
  case X86::BI__builtin_ia32_loadaps512_mask:
7846
26
  case X86::BI__builtin_ia32_loadapd128_mask:
7847
26
  case X86::BI__builtin_ia32_loadapd256_mask:
7848
26
  case X86::BI__builtin_ia32_loadapd512_mask:
7849
26
  case X86::BI__builtin_ia32_movdqa32load128_mask:
7850
26
  case X86::BI__builtin_ia32_movdqa32load256_mask:
7851
26
  case X86::BI__builtin_ia32_movdqa32load512_mask:
7852
26
  case X86::BI__builtin_ia32_movdqa64load128_mask:
7853
26
  case X86::BI__builtin_ia32_movdqa64load256_mask:
7854
26
  case X86::BI__builtin_ia32_movdqa64load512_mask: {
7855
26
    unsigned Align =
7856
26
      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7857
26
    return EmitX86MaskedLoad(*this, Ops, Align);
7858
26
  }
7859
26
7860
6
  case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7861
6
  case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7862
6
    llvm::Type *DstTy = ConvertType(E->getType());
7863
6
    return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7864
6
  }
7865
6
7866
4
  case X86::BI__builtin_ia32_storehps:
7867
4
  case X86::BI__builtin_ia32_storelps: {
7868
4
    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7869
4
    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7870
4
7871
4
    // cast val v2i64
7872
4
    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7873
4
7874
4
    // extract (0, 1)
7875
4
    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 
02
:
12
;
7876
4
    llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7877
4
    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7878
4
7879
4
    // cast pointer to i64 & store
7880
4
    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7881
4
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7882
4
  }
7883
25
  case X86::BI__builtin_ia32_palignr128:
7884
25
  case X86::BI__builtin_ia32_palignr256:
7885
25
  case X86::BI__builtin_ia32_palignr512_mask: {
7886
25
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7887
25
7888
25
    unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7889
25
    assert(NumElts % 16 == 0);
7890
25
7891
25
    // If palignr is shifting the pair of vectors more than the size of two
7892
25
    // lanes, emit zero.
7893
25
    if (ShiftVal >= 32)
7894
2
      return llvm::Constant::getNullValue(ConvertType(E->getType()));
7895
23
7896
23
    // If palignr is shifting the pair of input vectors more than one lane,
7897
23
    // but less than two lanes, convert to shifting in zeroes.
7898
23
    
if (23
ShiftVal > 1623
) {
7899
4
      ShiftVal -= 16;
7900
4
      Ops[1] = Ops[0];
7901
4
      Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7902
4
    }
7903
23
7904
23
    uint32_t Indices[64];
7905
23
    // 256-bit palignr operates on 128-bit lanes so we need to handle that
7906
72
    for (unsigned l = 0; 
l != NumElts72
;
l += 1649
) {
7907
833
      for (unsigned i = 0; 
i != 16833
;
++i784
) {
7908
784
        unsigned Idx = ShiftVal + i;
7909
784
        if (Idx >= 16)
7910
119
          Idx += NumElts - 16; // End of lane, switch operand.
7911
784
        Indices[l + i] = Idx + l;
7912
784
      }
7913
49
    }
7914
23
7915
23
    Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7916
23
                                               makeArrayRef(Indices, NumElts),
7917
23
                                               "palignr");
7918
23
7919
23
    // If this isn't a masked builtin, just return the align operation.
7920
23
    if (Ops.size() == 3)
7921
17
      return Align;
7922
6
7923
6
    return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7924
6
  }
7925
6
7926
14
  case X86::BI__builtin_ia32_vperm2f128_pd256:
7927
14
  case X86::BI__builtin_ia32_vperm2f128_ps256:
7928
14
  case X86::BI__builtin_ia32_vperm2f128_si256:
7929
14
  case X86::BI__builtin_ia32_permti256: {
7930
14
    unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7931
14
    unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7932
14
7933
14
    // This takes a very simple approach since there are two lanes and a
7934
14
    // shuffle can have 2 inputs. So we reserve the first input for the first
7935
14
    // lane and the second input for the second lane. This may result in
7936
14
    // duplicate sources, but this can be dealt with in the backend.
7937
14
7938
14
    Value *OutOps[2];
7939
14
    uint32_t Indices[8];
7940
42
    for (unsigned l = 0; 
l != 242
;
++l28
) {
7941
28
      // Determine the source for this lane.
7942
28
      if (Imm & (1 << ((l * 4) + 3)))
7943
2
        OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
7944
26
      else 
if (26
Imm & (1 << ((l * 4) + 1))26
)
7945
14
        OutOps[l] = Ops[1];
7946
26
      else
7947
12
        OutOps[l] = Ops[0];
7948
28
7949
116
      for (unsigned i = 0; 
i != NumElts/2116
;
++i88
) {
7950
88
        // Start with ith element of the source for this lane.
7951
88
        unsigned Idx = (l * NumElts) + i;
7952
88
        // If bit 0 of the immediate half is set, switch to the high half of
7953
88
        // the source.
7954
88
        if (Imm & (1 << (l * 4)))
7955
50
          Idx += NumElts/2;
7956
88
        Indices[(l * (NumElts/2)) + i] = Idx;
7957
88
      }
7958
28
    }
7959
14
7960
14
    return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
7961
14
                                       makeArrayRef(Indices, NumElts),
7962
14
                                       "vperm");
7963
14
  }
7964
14
7965
8
  case X86::BI__builtin_ia32_movnti:
7966
8
  case X86::BI__builtin_ia32_movnti64:
7967
8
  case X86::BI__builtin_ia32_movntsd:
7968
8
  case X86::BI__builtin_ia32_movntss: {
7969
8
    llvm::MDNode *Node = llvm::MDNode::get(
7970
8
        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7971
8
7972
8
    Value *Ptr = Ops[0];
7973
8
    Value *Src = Ops[1];
7974
8
7975
8
    // Extract the 0'th element of the source vector.
7976
8
    if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7977
7
        BuiltinID == X86::BI__builtin_ia32_movntss)
7978
2
      Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7979
8
7980
8
    // Convert the type of the pointer to a pointer to the stored type.
7981
8
    Value *BC = Builder.CreateBitCast(
7982
8
        Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7983
8
7984
8
    // Unaligned nontemporal store of the scalar value.
7985
8
    StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7986
8
    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7987
8
    SI->setAlignment(1);
7988
8
    return SI;
7989
8
  }
7990
8
7991
1.31k
  case X86::BI__builtin_ia32_selectb_128:
7992
1.31k
  case X86::BI__builtin_ia32_selectb_256:
7993
1.31k
  case X86::BI__builtin_ia32_selectb_512:
7994
1.31k
  case X86::BI__builtin_ia32_selectw_128:
7995
1.31k
  case X86::BI__builtin_ia32_selectw_256:
7996
1.31k
  case X86::BI__builtin_ia32_selectw_512:
7997
1.31k
  case X86::BI__builtin_ia32_selectd_128:
7998
1.31k
  case X86::BI__builtin_ia32_selectd_256:
7999
1.31k
  case X86::BI__builtin_ia32_selectd_512:
8000
1.31k
  case X86::BI__builtin_ia32_selectq_128:
8001
1.31k
  case X86::BI__builtin_ia32_selectq_256:
8002
1.31k
  case X86::BI__builtin_ia32_selectq_512:
8003
1.31k
  case X86::BI__builtin_ia32_selectps_128:
8004
1.31k
  case X86::BI__builtin_ia32_selectps_256:
8005
1.31k
  case X86::BI__builtin_ia32_selectps_512:
8006
1.31k
  case X86::BI__builtin_ia32_selectpd_128:
8007
1.31k
  case X86::BI__builtin_ia32_selectpd_256:
8008
1.31k
  case X86::BI__builtin_ia32_selectpd_512:
8009
1.31k
    return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
8010
36
  case X86::BI__builtin_ia32_pcmpeqb128_mask:
8011
36
  case X86::BI__builtin_ia32_pcmpeqb256_mask:
8012
36
  case X86::BI__builtin_ia32_pcmpeqb512_mask:
8013
36
  case X86::BI__builtin_ia32_pcmpeqw128_mask:
8014
36
  case X86::BI__builtin_ia32_pcmpeqw256_mask:
8015
36
  case X86::BI__builtin_ia32_pcmpeqw512_mask:
8016
36
  case X86::BI__builtin_ia32_pcmpeqd128_mask:
8017
36
  case X86::BI__builtin_ia32_pcmpeqd256_mask:
8018
36
  case X86::BI__builtin_ia32_pcmpeqd512_mask:
8019
36
  case X86::BI__builtin_ia32_pcmpeqq128_mask:
8020
36
  case X86::BI__builtin_ia32_pcmpeqq256_mask:
8021
36
  case X86::BI__builtin_ia32_pcmpeqq512_mask:
8022
36
    return EmitX86MaskedCompare(*this, 0, false, Ops);
8023
36
  case X86::BI__builtin_ia32_pcmpgtb128_mask:
8024
36
  case X86::BI__builtin_ia32_pcmpgtb256_mask:
8025
36
  case X86::BI__builtin_ia32_pcmpgtb512_mask:
8026
36
  case X86::BI__builtin_ia32_pcmpgtw128_mask:
8027
36
  case X86::BI__builtin_ia32_pcmpgtw256_mask:
8028
36
  case X86::BI__builtin_ia32_pcmpgtw512_mask:
8029
36
  case X86::BI__builtin_ia32_pcmpgtd128_mask:
8030
36
  case X86::BI__builtin_ia32_pcmpgtd256_mask:
8031
36
  case X86::BI__builtin_ia32_pcmpgtd512_mask:
8032
36
  case X86::BI__builtin_ia32_pcmpgtq128_mask:
8033
36
  case X86::BI__builtin_ia32_pcmpgtq256_mask:
8034
36
  case X86::BI__builtin_ia32_pcmpgtq512_mask:
8035
36
    return EmitX86MaskedCompare(*this, 6, true, Ops);
8036
180
  case X86::BI__builtin_ia32_cmpb128_mask:
8037
180
  case X86::BI__builtin_ia32_cmpb256_mask:
8038
180
  case X86::BI__builtin_ia32_cmpb512_mask:
8039
180
  case X86::BI__builtin_ia32_cmpw128_mask:
8040
180
  case X86::BI__builtin_ia32_cmpw256_mask:
8041
180
  case X86::BI__builtin_ia32_cmpw512_mask:
8042
180
  case X86::BI__builtin_ia32_cmpd128_mask:
8043
180
  case X86::BI__builtin_ia32_cmpd256_mask:
8044
180
  case X86::BI__builtin_ia32_cmpd512_mask:
8045
180
  case X86::BI__builtin_ia32_cmpq128_mask:
8046
180
  case X86::BI__builtin_ia32_cmpq256_mask:
8047
180
  case X86::BI__builtin_ia32_cmpq512_mask: {
8048
180
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8049
180
    return EmitX86MaskedCompare(*this, CC, true, Ops);
8050
180
  }
8051
248
  case X86::BI__builtin_ia32_ucmpb128_mask:
8052
248
  case X86::BI__builtin_ia32_ucmpb256_mask:
8053
248
  case X86::BI__builtin_ia32_ucmpb512_mask:
8054
248
  case X86::BI__builtin_ia32_ucmpw128_mask:
8055
248
  case X86::BI__builtin_ia32_ucmpw256_mask:
8056
248
  case X86::BI__builtin_ia32_ucmpw512_mask:
8057
248
  case X86::BI__builtin_ia32_ucmpd128_mask:
8058
248
  case X86::BI__builtin_ia32_ucmpd256_mask:
8059
248
  case X86::BI__builtin_ia32_ucmpd512_mask:
8060
248
  case X86::BI__builtin_ia32_ucmpq128_mask:
8061
248
  case X86::BI__builtin_ia32_ucmpq256_mask:
8062
248
  case X86::BI__builtin_ia32_ucmpq512_mask: {
8063
248
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8064
248
    return EmitX86MaskedCompare(*this, CC, false, Ops);
8065
248
  }
8066
248
8067
18
  case X86::BI__builtin_ia32_vplzcntd_128_mask:
8068
18
  case X86::BI__builtin_ia32_vplzcntd_256_mask:
8069
18
  case X86::BI__builtin_ia32_vplzcntd_512_mask:
8070
18
  case X86::BI__builtin_ia32_vplzcntq_128_mask:
8071
18
  case X86::BI__builtin_ia32_vplzcntq_256_mask:
8072
18
  case X86::BI__builtin_ia32_vplzcntq_512_mask: {
8073
18
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
8074
18
    return EmitX86Select(*this, Ops[2],
8075
18
                         Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
8076
18
                         Ops[1]);
8077
18
  }
8078
18
8079
44
  case X86::BI__builtin_ia32_pabsb128:
8080
44
  case X86::BI__builtin_ia32_pabsw128:
8081
44
  case X86::BI__builtin_ia32_pabsd128:
8082
44
  case X86::BI__builtin_ia32_pabsb256:
8083
44
  case X86::BI__builtin_ia32_pabsw256:
8084
44
  case X86::BI__builtin_ia32_pabsd256:
8085
44
  case X86::BI__builtin_ia32_pabsq128_mask:
8086
44
  case X86::BI__builtin_ia32_pabsq256_mask:
8087
44
  case X86::BI__builtin_ia32_pabsb512_mask:
8088
44
  case X86::BI__builtin_ia32_pabsw512_mask:
8089
44
  case X86::BI__builtin_ia32_pabsd512_mask:
8090
44
  case X86::BI__builtin_ia32_pabsq512_mask:
8091
44
    return EmitX86Abs(*this, Ops);
8092
44
8093
49
  case X86::BI__builtin_ia32_pmaxsb128:
8094
49
  case X86::BI__builtin_ia32_pmaxsw128:
8095
49
  case X86::BI__builtin_ia32_pmaxsd128:
8096
49
  case X86::BI__builtin_ia32_pmaxsq128_mask:
8097
49
  case X86::BI__builtin_ia32_pmaxsb256:
8098
49
  case X86::BI__builtin_ia32_pmaxsw256:
8099
49
  case X86::BI__builtin_ia32_pmaxsd256:
8100
49
  case X86::BI__builtin_ia32_pmaxsq256_mask:
8101
49
  case X86::BI__builtin_ia32_pmaxsb512_mask:
8102
49
  case X86::BI__builtin_ia32_pmaxsw512_mask:
8103
49
  case X86::BI__builtin_ia32_pmaxsd512_mask:
8104
49
  case X86::BI__builtin_ia32_pmaxsq512_mask:
8105
49
    return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
8106
49
  case X86::BI__builtin_ia32_pmaxub128:
8107
49
  case X86::BI__builtin_ia32_pmaxuw128:
8108
49
  case X86::BI__builtin_ia32_pmaxud128:
8109
49
  case X86::BI__builtin_ia32_pmaxuq128_mask:
8110
49
  case X86::BI__builtin_ia32_pmaxub256:
8111
49
  case X86::BI__builtin_ia32_pmaxuw256:
8112
49
  case X86::BI__builtin_ia32_pmaxud256:
8113
49
  case X86::BI__builtin_ia32_pmaxuq256_mask:
8114
49
  case X86::BI__builtin_ia32_pmaxub512_mask:
8115
49
  case X86::BI__builtin_ia32_pmaxuw512_mask:
8116
49
  case X86::BI__builtin_ia32_pmaxud512_mask:
8117
49
  case X86::BI__builtin_ia32_pmaxuq512_mask:
8118
49
    return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
8119
49
  case X86::BI__builtin_ia32_pminsb128:
8120
49
  case X86::BI__builtin_ia32_pminsw128:
8121
49
  case X86::BI__builtin_ia32_pminsd128:
8122
49
  case X86::BI__builtin_ia32_pminsq128_mask:
8123
49
  case X86::BI__builtin_ia32_pminsb256:
8124
49
  case X86::BI__builtin_ia32_pminsw256:
8125
49
  case X86::BI__builtin_ia32_pminsd256:
8126
49
  case X86::BI__builtin_ia32_pminsq256_mask:
8127
49
  case X86::BI__builtin_ia32_pminsb512_mask:
8128
49
  case X86::BI__builtin_ia32_pminsw512_mask:
8129
49
  case X86::BI__builtin_ia32_pminsd512_mask:
8130
49
  case X86::BI__builtin_ia32_pminsq512_mask:
8131
49
    return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
8132
48
  case X86::BI__builtin_ia32_pminub128:
8133
48
  case X86::BI__builtin_ia32_pminuw128:
8134
48
  case X86::BI__builtin_ia32_pminud128:
8135
48
  case X86::BI__builtin_ia32_pminuq128_mask:
8136
48
  case X86::BI__builtin_ia32_pminub256:
8137
48
  case X86::BI__builtin_ia32_pminuw256:
8138
48
  case X86::BI__builtin_ia32_pminud256:
8139
48
  case X86::BI__builtin_ia32_pminuq256_mask:
8140
48
  case X86::BI__builtin_ia32_pminub512_mask:
8141
48
  case X86::BI__builtin_ia32_pminuw512_mask:
8142
48
  case X86::BI__builtin_ia32_pminud512_mask:
8143
48
  case X86::BI__builtin_ia32_pminuq512_mask:
8144
48
    return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
8145
48
8146
48
  // 3DNow!
8147
4
  case X86::BI__builtin_ia32_pswapdsf:
8148
4
  case X86::BI__builtin_ia32_pswapdsi: {
8149
4
    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
8150
4
    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
8151
4
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
8152
4
    return Builder.CreateCall(F, Ops, "pswapd");
8153
4
  }
8154
6
  case X86::BI__builtin_ia32_rdrand16_step:
8155
6
  case X86::BI__builtin_ia32_rdrand32_step:
8156
6
  case X86::BI__builtin_ia32_rdrand64_step:
8157
6
  case X86::BI__builtin_ia32_rdseed16_step:
8158
6
  case X86::BI__builtin_ia32_rdseed32_step:
8159
6
  case X86::BI__builtin_ia32_rdseed64_step: {
8160
6
    Intrinsic::ID ID;
8161
6
    switch (BuiltinID) {
8162
0
    
default: 0
llvm_unreachable0
("Unsupported intrinsic!");
8163
1
    case X86::BI__builtin_ia32_rdrand16_step:
8164
1
      ID = Intrinsic::x86_rdrand_16;
8165
1
      break;
8166
1
    case X86::BI__builtin_ia32_rdrand32_step:
8167
1
      ID = Intrinsic::x86_rdrand_32;
8168
1
      break;
8169
1
    case X86::BI__builtin_ia32_rdrand64_step:
8170
1
      ID = Intrinsic::x86_rdrand_64;
8171
1
      break;
8172
1
    case X86::BI__builtin_ia32_rdseed16_step:
8173
1
      ID = Intrinsic::x86_rdseed_16;
8174
1
      break;
8175
1
    case X86::BI__builtin_ia32_rdseed32_step:
8176
1
      ID = Intrinsic::x86_rdseed_32;
8177
1
      break;
8178
1
    case X86::BI__builtin_ia32_rdseed64_step:
8179
1
      ID = Intrinsic::x86_rdseed_64;
8180
1
      break;
8181
6
    }
8182
6
8183
6
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
8184
6
    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
8185
6
                                      Ops[0]);
8186
6
    return Builder.CreateExtractValue(Call, 1);
8187
6
  }
8188
6
8189
6
  // SSE packed comparison intrinsics
8190
7
  case X86::BI__builtin_ia32_cmpeqps:
8191
7
  case X86::BI__builtin_ia32_cmpeqpd:
8192
7
    return getVectorFCmpIR(CmpInst::FCMP_OEQ);
8193
6
  case X86::BI__builtin_ia32_cmpltps:
8194
6
  case X86::BI__builtin_ia32_cmpltpd:
8195
6
    return getVectorFCmpIR(CmpInst::FCMP_OLT);
8196
6
  case X86::BI__builtin_ia32_cmpleps:
8197
6
  case X86::BI__builtin_ia32_cmplepd:
8198
6
    return getVectorFCmpIR(CmpInst::FCMP_OLE);
8199
3
  case X86::BI__builtin_ia32_cmpunordps:
8200
3
  case X86::BI__builtin_ia32_cmpunordpd:
8201
3
    return getVectorFCmpIR(CmpInst::FCMP_UNO);
8202
3
  case X86::BI__builtin_ia32_cmpneqps:
8203
3
  case X86::BI__builtin_ia32_cmpneqpd:
8204
3
    return getVectorFCmpIR(CmpInst::FCMP_UNE);
8205
6
  case X86::BI__builtin_ia32_cmpnltps:
8206
6
  case X86::BI__builtin_ia32_cmpnltpd:
8207
6
    return getVectorFCmpIR(CmpInst::FCMP_UGE);
8208
6
  case X86::BI__builtin_ia32_cmpnleps:
8209
6
  case X86::BI__builtin_ia32_cmpnlepd:
8210
6
    return getVectorFCmpIR(CmpInst::FCMP_UGT);
8211
3
  case X86::BI__builtin_ia32_cmpordps:
8212
3
  case X86::BI__builtin_ia32_cmpordpd:
8213
3
    return getVectorFCmpIR(CmpInst::FCMP_ORD);
8214
54
  case X86::BI__builtin_ia32_cmpps:
8215
54
  case X86::BI__builtin_ia32_cmpps256:
8216
54
  case X86::BI__builtin_ia32_cmppd:
8217
54
  case X86::BI__builtin_ia32_cmppd256: {
8218
54
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8219
54
    // If this one of the SSE immediates, we can use native IR.
8220
54
    if (
CC < 854
) {
8221
26
      FCmpInst::Predicate Pred;
8222
26
      switch (CC) {
8223
12
      case 0: Pred = FCmpInst::FCMP_OEQ; break;
8224
2
      case 1: Pred = FCmpInst::FCMP_OLT; break;
8225
2
      case 2: Pred = FCmpInst::FCMP_OLE; break;
8226
2
      case 3: Pred = FCmpInst::FCMP_UNO; break;
8227
2
      case 4: Pred = FCmpInst::FCMP_UNE; break;
8228
2
      case 5: Pred = FCmpInst::FCMP_UGE; break;
8229
2
      case 6: Pred = FCmpInst::FCMP_UGT; break;
8230
2
      case 7: Pred = FCmpInst::FCMP_ORD; break;
8231
26
      }
8232
26
      return getVectorFCmpIR(Pred);
8233
26
    }
8234
28
8235
28
    // We can't handle 8-31 immediates with native IR, use the intrinsic.
8236
28
    // Except for predicates that create constants.
8237
28
    Intrinsic::ID ID;
8238
28
    switch (BuiltinID) {
8239
0
    
default: 0
llvm_unreachable0
("Unsupported intrinsic!");
8240
3
    case X86::BI__builtin_ia32_cmpps:
8241
3
      ID = Intrinsic::x86_sse_cmp_ps;
8242
3
      break;
8243
11
    case X86::BI__builtin_ia32_cmpps256:
8244
11
      // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8245
11
      // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8246
11
      if (
CC == 0xf || 11
CC == 0xb9
||
CC == 0x1b7
||
CC == 0x1f5
) {
8247
6
         Value *Constant = (CC == 0xf || CC == 0x1f) ?
8248
4
                llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
8249
4
                llvm::Constant::getNullValue(Builder.getInt32Ty());
8250
8
         Value *Vec = Builder.CreateVectorSplat(
8251
8
                        Ops[0]->getType()->getVectorNumElements(), Constant);
8252
8
         return Builder.CreateBitCast(Vec, Ops[0]->getType());
8253
8
      }
8254
3
      ID = Intrinsic::x86_avx_cmp_ps_256;
8255
3
      break;
8256
3
    case X86::BI__builtin_ia32_cmppd:
8257
3
      ID = Intrinsic::x86_sse2_cmp_pd;
8258
3
      break;
8259
11
    case X86::BI__builtin_ia32_cmppd256:
8260
11
      // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8261
11
      // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8262
11
      if (
CC == 0xf || 11
CC == 0xb9
||
CC == 0x1b7
||
CC == 0x1f5
) {
8263
6
         Value *Constant = (CC == 0xf || CC == 0x1f) ?
8264
4
                llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
8265
4
                llvm::Constant::getNullValue(Builder.getInt64Ty());
8266
8
         Value *Vec = Builder.CreateVectorSplat(
8267
8
                        Ops[0]->getType()->getVectorNumElements(), Constant);
8268
8
         return Builder.CreateBitCast(Vec, Ops[0]->getType());
8269
8
      }
8270
3
      ID = Intrinsic::x86_avx_cmp_pd_256;
8271
3
      break;
8272
12
    }
8273
12
8274
12
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8275
12
  }
8276
12
8277
12
  // SSE scalar comparison intrinsics
8278
1
  case X86::BI__builtin_ia32_cmpeqss:
8279
1
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
8280
3
  case X86::BI__builtin_ia32_cmpltss:
8281
3
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
8282
3
  case X86::BI__builtin_ia32_cmpless:
8283
3
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
8284
1
  case X86::BI__builtin_ia32_cmpunordss:
8285
1
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
8286
1
  case X86::BI__builtin_ia32_cmpneqss:
8287
1
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
8288
3
  case X86::BI__builtin_ia32_cmpnltss:
8289
3
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
8290
3
  case X86::BI__builtin_ia32_cmpnless:
8291
3
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
8292
1
  case X86::BI__builtin_ia32_cmpordss:
8293
1
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
8294
2
  case X86::BI__builtin_ia32_cmpeqsd:
8295
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
8296
5
  case X86::BI__builtin_ia32_cmpltsd:
8297
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
8298
5
  case X86::BI__builtin_ia32_cmplesd:
8299
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
8300
2
  case X86::BI__builtin_ia32_cmpunordsd:
8301
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
8302
2
  case X86::BI__builtin_ia32_cmpneqsd:
8303
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
8304
5
  case X86::BI__builtin_ia32_cmpnltsd:
8305
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
8306
5
  case X86::BI__builtin_ia32_cmpnlesd:
8307
5
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8308
2
  case X86::BI__builtin_ia32_cmpordsd:
8309
2
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8310
12
8311
4
  case X86::BI__emul:
8312
4
  case X86::BI__emulu: {
8313
4
    llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8314
4
    bool isSigned = (BuiltinID == X86::BI__emul);
8315
4
    Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8316
4
    Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8317
4
    return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8318
4
  }
8319
4
  case X86::BI__mulh:
8320
4
  case X86::BI__umulh:
8321
4
  case X86::BI_mul128:
8322
4
  case X86::BI_umul128: {
8323
4
    llvm::Type *ResType = ConvertType(E->getType());
8324
4
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8325
4
8326
3
    bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8327
4
    Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8328
4
    Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8329
4
8330
4
    Value *MulResult, *HigherBits;
8331
4
    if (
IsSigned4
) {
8332
2
      MulResult = Builder.CreateNSWMul(LHS, RHS);
8333
2
      HigherBits = Builder.CreateAShr(MulResult, 64);
8334
4
    } else {
8335
2
      MulResult = Builder.CreateNUWMul(LHS, RHS);
8336
2
      HigherBits = Builder.CreateLShr(MulResult, 64);
8337
2
    }
8338
4
    HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8339
4
8340
4
    if (
BuiltinID == X86::BI__mulh || 4
BuiltinID == X86::BI__umulh3
)
8341
2
      return HigherBits;
8342
2
8343
2
    Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8344
2
    Builder.CreateStore(HigherBits, HighBitsAddress);
8345
2
    return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8346
2
  }
8347
2
8348
1
  case X86::BI__faststorefence: {
8349
1
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8350
1
                               llvm::SyncScope::System);
8351
2
  }
8352
6
  case X86::BI_ReadWriteBarrier:
8353
6
  case X86::BI_ReadBarrier:
8354
6
  case X86::BI_WriteBarrier: {
8355
6
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8356
6
                               llvm::SyncScope::SingleThread);
8357
6
  }
8358
7
  case X86::BI_BitScanForward:
8359
7
  case X86::BI_BitScanForward64:
8360
7
    return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8361
7
  case X86::BI_BitScanReverse:
8362
7
  case X86::BI_BitScanReverse64:
8363
7
    return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8364
7
8365
1
  case X86::BI_InterlockedAnd64:
8366
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8367
1
  case X86::BI_InterlockedExchange64:
8368
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8369
1
  case X86::BI_InterlockedExchangeAdd64:
8370
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8371
1
  case X86::BI_InterlockedExchangeSub64:
8372
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8373
1
  case X86::BI_InterlockedOr64:
8374
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8375
1
  case X86::BI_InterlockedXor64:
8376
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8377
1
  case X86::BI_InterlockedDecrement64:
8378
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8379
1
  case X86::BI_InterlockedIncrement64:
8380
1
    return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8381
7
8382
2
  case X86::BI_AddressOfReturnAddress: {
8383
2
    Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8384
2
    return Builder.CreateCall(F);
8385
7
  }
8386
2
  case X86::BI__stosb: {
8387
2
    // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8388
2
    // instruction, but it will create a memset that won't be optimized away.
8389
2
    return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8390
7
  }
8391
2
  case X86::BI__ud2:
8392
2
    // llvm.trap makes a ud2a instruction on x86.
8393
2
    return EmitTrapCall(Intrinsic::trap);
8394
2
  case X86::BI__int2c: {
8395
2
    // This syscall signals a driver assertion failure in x86 NT kernels.
8396
2
    llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8397
2
    llvm::InlineAsm *IA =
8398
2
        llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8399
2
    llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8400
2
        getLLVMContext(), llvm::AttributeList::FunctionIndex,
8401
2
        llvm::Attribute::NoReturn);
8402
2
    CallSite CS = Builder.CreateCall(IA);
8403
2
    CS.setAttributes(NoReturnAttr);
8404
2
    return CS.getInstruction();
8405
7
  }
8406
4
  case X86::BI__readfsbyte:
8407
4
  case X86::BI__readfsword:
8408
4
  case X86::BI__readfsdword:
8409
4
  case X86::BI__readfsqword: {
8410
4
    llvm::Type *IntTy = ConvertType(E->getType());
8411
4
    Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8412
4
                                        llvm::PointerType::get(IntTy, 257));
8413
4
    LoadInst *Load = Builder.CreateAlignedLoad(
8414
4
        IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8415
4
    Load->setVolatile(true);
8416
4
    return Load;
8417
4
  }
8418
4
  case X86::BI__readgsbyte:
8419
4
  case X86::BI__readgsword:
8420
4
  case X86::BI__readgsdword:
8421
4
  case X86::BI__readgsqword: {
8422
4
    llvm::Type *IntTy = ConvertType(E->getType());
8423
4
    Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8424
4
                                        llvm::PointerType::get(IntTy, 256));
8425
4
    LoadInst *Load = Builder.CreateAlignedLoad(
8426
4
        IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8427
4
    Load->setVolatile(true);
8428
4
    return Load;
8429
0
  }
8430
2.70k
  }
8431
2.70k
}
8432
8433
8434
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8435
1.01k
                                           const CallExpr *E) {
8436
1.01k
  SmallVector<Value*, 4> Ops;
8437
1.01k
8438
3.50k
  for (unsigned i = 0, e = E->getNumArgs(); 
i != e3.50k
;
i++2.49k
)
8439
2.49k
    Ops.push_back(EmitScalarExpr(E->getArg(i)));
8440
1.01k
8441
1.01k
  Intrinsic::ID ID = Intrinsic::not_intrinsic;
8442
1.01k
8443
1.01k
  switch (BuiltinID) {
8444
0
  default: return nullptr;
8445
1.01k
8446
1.01k
  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8447
1.01k
  // call __builtin_readcyclecounter.
8448
1
  case PPC::BI__builtin_ppc_get_timebase:
8449
1
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8450
1.01k
8451
1.01k
  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8452
374
  case PPC::BI__builtin_altivec_lvx:
8453
374
  case PPC::BI__builtin_altivec_lvxl:
8454
374
  case PPC::BI__builtin_altivec_lvebx:
8455
374
  case PPC::BI__builtin_altivec_lvehx:
8456
374
  case PPC::BI__builtin_altivec_lvewx:
8457
374
  case PPC::BI__builtin_altivec_lvsl:
8458
374
  case PPC::BI__builtin_altivec_lvsr:
8459
374
  case PPC::BI__builtin_vsx_lxvd2x:
8460
374
  case PPC::BI__builtin_vsx_lxvw4x:
8461
374
  case PPC::BI__builtin_vsx_lxvd2x_be:
8462
374
  case PPC::BI__builtin_vsx_lxvw4x_be:
8463
374
  case PPC::BI__builtin_vsx_lxvl:
8464
374
  case PPC::BI__builtin_vsx_lxvll:
8465
374
  {
8466
374
    if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8467
374
       
BuiltinID == PPC::BI__builtin_vsx_lxvll350
){
8468
26
      Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8469
374
    }else {
8470
348
      Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8471
348
      Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8472
348
      Ops.pop_back();
8473
348
    }
8474
374
8475
374
    switch (BuiltinID) {
8476
0
    
default: 0
llvm_unreachable0
("Unsupported ld/lvsl/lvsr intrinsic!");
8477
108
    case PPC::BI__builtin_altivec_lvx:
8478
108
      ID = Intrinsic::ppc_altivec_lvx;
8479
108
      break;
8480
108
    case PPC::BI__builtin_altivec_lvxl:
8481
108
      ID = Intrinsic::ppc_altivec_lvxl;
8482
108
      break;
8483
12
    case PPC::BI__builtin_altivec_lvebx:
8484
12
      ID = Intrinsic::ppc_altivec_lvebx;
8485
12
      break;
8486
12
    case PPC::BI__builtin_altivec_lvehx:
8487
12
      ID = Intrinsic::ppc_altivec_lvehx;
8488
12
      break;
8489
18
    case PPC::BI__builtin_altivec_lvewx:
8490
18
      ID = Intrinsic::ppc_altivec_lvewx;
8491
18
      break;
8492
19
    case PPC::BI__builtin_altivec_lvsl:
8493
19
      ID = Intrinsic::ppc_altivec_lvsl;
8494
19
      break;
8495
19
    case PPC::BI__builtin_altivec_lvsr:
8496
19
      ID = Intrinsic::ppc_altivec_lvsr;
8497
19
      break;
8498
8
    case PPC::BI__builtin_vsx_lxvd2x:
8499
8
      ID = Intrinsic::ppc_vsx_lxvd2x;
8500
8
      break;
8501
34
    case PPC::BI__builtin_vsx_lxvw4x:
8502
34
      ID = Intrinsic::ppc_vsx_lxvw4x;
8503
34
      break;
8504
7
    case PPC::BI__builtin_vsx_lxvd2x_be:
8505
7
      ID = Intrinsic::ppc_vsx_lxvd2x_be;
8506
7
      break;
8507
3
    case PPC::BI__builtin_vsx_lxvw4x_be:
8508
3
      ID = Intrinsic::ppc_vsx_lxvw4x_be;
8509
3
      break;
8510
24
    case PPC::BI__builtin_vsx_lxvl:
8511
24
      ID = Intrinsic::ppc_vsx_lxvl;
8512
24
      break;
8513
2
    case PPC::BI__builtin_vsx_lxvll:
8514
2
      ID = Intrinsic::ppc_vsx_lxvll;
8515
2
      break;
8516
374
    }
8517
374
    llvm::Function *F = CGM.getIntrinsic(ID);
8518
374
    return Builder.CreateCall(F, Ops, "");
8519
374
  }
8520
374
8521
374
  // vec_st, vec_xst_be
8522
486
  case PPC::BI__builtin_altivec_stvx:
8523
486
  case PPC::BI__builtin_altivec_stvxl:
8524
486
  case PPC::BI__builtin_altivec_stvebx:
8525
486
  case PPC::BI__builtin_altivec_stvehx:
8526
486
  case PPC::BI__builtin_altivec_stvewx:
8527
486
  case PPC::BI__builtin_vsx_stxvd2x:
8528
486
  case PPC::BI__builtin_vsx_stxvw4x:
8529
486
  case PPC::BI__builtin_vsx_stxvd2x_be:
8530
486
  case PPC::BI__builtin_vsx_stxvw4x_be:
8531
486
  case PPC::BI__builtin_vsx_stxvl:
8532
486
  case PPC::BI__builtin_vsx_stxvll:
8533
486
  {
8534
486
    if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8535
486
      
BuiltinID == PPC::BI__builtin_vsx_stxvll462
){
8536
26
      Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8537
486
    }else {
8538
460
      Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8539
460
      Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8540
460
      Ops.pop_back();
8541
460
    }
8542
486
8543
486
    switch (BuiltinID) {
8544
0
    
default: 0
llvm_unreachable0
("Unsupported st intrinsic!");
8545
150
    case PPC::BI__builtin_altivec_stvx:
8546
150
      ID = Intrinsic::ppc_altivec_stvx;
8547
150
      break;
8548
156
    case PPC::BI__builtin_altivec_stvxl:
8549
156
      ID = Intrinsic::ppc_altivec_stvxl;
8550
156
      break;
8551
24
    case PPC::BI__builtin_altivec_stvebx:
8552
24
      ID = Intrinsic::ppc_altivec_stvebx;
8553
24
      break;
8554
36
    case PPC::BI__builtin_altivec_stvehx:
8555
36
      ID = Intrinsic::ppc_altivec_stvehx;
8556
36
      break;
8557
30
    case PPC::BI__builtin_altivec_stvewx:
8558
30
      ID = Intrinsic::ppc_altivec_stvewx;
8559
30
      break;
8560
8
    case PPC::BI__builtin_vsx_stxvd2x:
8561
8
      ID = Intrinsic::ppc_vsx_stxvd2x;
8562
8
      break;
8563
46
    case PPC::BI__builtin_vsx_stxvw4x:
8564
46
      ID = Intrinsic::ppc_vsx_stxvw4x;
8565
46
      break;
8566
7
    case PPC::BI__builtin_vsx_stxvd2x_be:
8567
7
      ID = Intrinsic::ppc_vsx_stxvd2x_be;
8568
7
      break;
8569
3
    case PPC::BI__builtin_vsx_stxvw4x_be:
8570
3
      ID = Intrinsic::ppc_vsx_stxvw4x_be;
8571
3
      break;
8572
24
    case PPC::BI__builtin_vsx_stxvl:
8573
24
      ID = Intrinsic::ppc_vsx_stxvl;
8574
24
      break;
8575
2
    case PPC::BI__builtin_vsx_stxvll:
8576
2
      ID = Intrinsic::ppc_vsx_stxvll;
8577
2
      break;
8578
486
    }
8579
486
    llvm::Function *F = CGM.getIntrinsic(ID);
8580
486
    return Builder.CreateCall(F, Ops, "");
8581
486
  }
8582
486
  // Square root
8583
4
  case PPC::BI__builtin_vsx_xvsqrtsp:
8584
4
  case PPC::BI__builtin_vsx_xvsqrtdp: {
8585
4
    llvm::Type *ResultType = ConvertType(E->getType());
8586
4
    Value *X = EmitScalarExpr(E->getArg(0));
8587
4
    ID = Intrinsic::sqrt;
8588
4
    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8589
4
    return Builder.CreateCall(F, X);
8590
4
  }
8591
4
  // Count leading zeros
8592
17
  case PPC::BI__builtin_altivec_vclzb:
8593
17
  case PPC::BI__builtin_altivec_vclzh:
8594
17
  case PPC::BI__builtin_altivec_vclzw:
8595
17
  case PPC::BI__builtin_altivec_vclzd: {
8596
17
    llvm::Type *ResultType = ConvertType(E->getType());
8597
17
    Value *X = EmitScalarExpr(E->getArg(0));
8598
17
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8599
17
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8600
17
    return Builder.CreateCall(F, {X, Undef});
8601
17
  }
8602
16
  case PPC::BI__builtin_altivec_vctzb:
8603
16
  case PPC::BI__builtin_altivec_vctzh:
8604
16
  case PPC::BI__builtin_altivec_vctzw:
8605
16
  case PPC::BI__builtin_altivec_vctzd: {
8606
16
    llvm::Type *ResultType = ConvertType(E->getType());
8607
16
    Value *X = EmitScalarExpr(E->getArg(0));
8608
16
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8609
16
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8610
16
    return Builder.CreateCall(F, {X, Undef});
8611
16
  }
8612
16
  case PPC::BI__builtin_altivec_vpopcntb:
8613
16
  case PPC::BI__builtin_altivec_vpopcnth:
8614
16
  case PPC::BI__builtin_altivec_vpopcntw:
8615
16
  case PPC::BI__builtin_altivec_vpopcntd: {
8616
16
    llvm::Type *ResultType = ConvertType(E->getType());
8617
16
    Value *X = EmitScalarExpr(E->getArg(0));
8618
16
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8619
16
    return Builder.CreateCall(F, X);
8620
16
  }
8621
16
  // Copy sign
8622
4
  case PPC::BI__builtin_vsx_xvcpsgnsp:
8623
4
  case PPC::BI__builtin_vsx_xvcpsgndp: {
8624
4
    llvm::Type *ResultType = ConvertType(E->getType());
8625
4
    Value *X = EmitScalarExpr(E->getArg(0));
8626
4
    Value *Y = EmitScalarExpr(E->getArg(1));
8627
4
    ID = Intrinsic::copysign;
8628
4
    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8629
4
    return Builder.CreateCall(F, {X, Y});
8630
4
  }
8631
4
  // Rounding/truncation
8632
24
  case PPC::BI__builtin_vsx_xvrspip:
8633
24
  case PPC::BI__builtin_vsx_xvrdpip:
8634
24
  case PPC::BI__builtin_vsx_xvrdpim:
8635
24
  case PPC::BI__builtin_vsx_xvrspim:
8636
24
  case PPC::BI__builtin_vsx_xvrdpi:
8637
24
  case PPC::BI__builtin_vsx_xvrspi:
8638
24
  case PPC::BI__builtin_vsx_xvrdpic:
8639
24
  case PPC::BI__builtin_vsx_xvrspic:
8640
24
  case PPC::BI__builtin_vsx_xvrdpiz:
8641
24
  case PPC::BI__builtin_vsx_xvrspiz: {
8642
24
    llvm::Type *ResultType = ConvertType(E->getType());
8643
24
    Value *X = EmitScalarExpr(E->getArg(0));
8644
24
    if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8645
22
        BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8646
4
      ID = Intrinsic::floor;
8647
20
    else 
if (20
BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8648
16
             BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8649
8
      ID = Intrinsic::round;
8650
12
    else 
if (12
BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8651
10
             BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8652
4
      ID = Intrinsic::nearbyint;
8653
8
    else 
if (8
BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8654
6
             BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8655
4
      ID = Intrinsic::ceil;
8656
4
    else 
if (4
BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8657
2
             BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8658
4
      ID = Intrinsic::trunc;
8659
24
    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8660
24
    return Builder.CreateCall(F, X);
8661
24
  }
8662
24
8663
24
  // Absolute value
8664
4
  case PPC::BI__builtin_vsx_xvabsdp:
8665
4
  case PPC::BI__builtin_vsx_xvabssp: {
8666
4
    llvm::Type *ResultType = ConvertType(E->getType());
8667
4
    Value *X = EmitScalarExpr(E->getArg(0));
8668
4
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8669
4
    return Builder.CreateCall(F, X);
8670
4
  }
8671
4
8672
4
  // FMA variations
8673
16
  case PPC::BI__builtin_vsx_xvmaddadp:
8674
16
  case PPC::BI__builtin_vsx_xvmaddasp:
8675
16
  case PPC::BI__builtin_vsx_xvnmaddadp:
8676
16
  case PPC::BI__builtin_vsx_xvnmaddasp:
8677
16
  case PPC::BI__builtin_vsx_xvmsubadp:
8678
16
  case PPC::BI__builtin_vsx_xvmsubasp:
8679
16
  case PPC::BI__builtin_vsx_xvnmsubadp:
8680
16
  case PPC::BI__builtin_vsx_xvnmsubasp: {
8681
16
    llvm::Type *ResultType = ConvertType(E->getType());
8682
16
    Value *X = EmitScalarExpr(E->getArg(0));
8683
16
    Value *Y = EmitScalarExpr(E->getArg(1));
8684
16
    Value *Z = EmitScalarExpr(E->getArg(2));
8685
16
    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8686
16
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8687
16
    switch (BuiltinID) {
8688
4
      case PPC::BI__builtin_vsx_xvmaddadp:
8689
4
      case PPC::BI__builtin_vsx_xvmaddasp:
8690
4
        return Builder.CreateCall(F, {X, Y, Z});
8691
4
      case PPC::BI__builtin_vsx_xvnmaddadp:
8692
4
      case PPC::BI__builtin_vsx_xvnmaddasp:
8693
4
        return Builder.CreateFSub(Zero,
8694
4
                                  Builder.CreateCall(F, {X, Y, Z}), "sub");
8695
4
      case PPC::BI__builtin_vsx_xvmsubadp:
8696
4
      case PPC::BI__builtin_vsx_xvmsubasp:
8697
4
        return Builder.CreateCall(F,
8698
4
                                  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8699
4
      case PPC::BI__builtin_vsx_xvnmsubadp:
8700
4
      case PPC::BI__builtin_vsx_xvnmsubasp:
8701
4
        Value *FsubRes =
8702
4
          Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8703
4
        return Builder.CreateFSub(Zero, FsubRes, "sub");
8704
0
    }
8705
0
    
llvm_unreachable0
("Unknown FMA operation");
8706
0
    return nullptr; // Suppress no-return warning
8707
0
  }
8708
0
8709
4
  case PPC::BI__builtin_vsx_insertword: {
8710
4
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8711
4
8712
4
    // Third argument is a compile time constant int. It must be clamped to
8713
4
    // to the range [0, 12].
8714
4
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8715
4
    assert(ArgCI &&
8716
4
           "Third arg to xxinsertw intrinsic must be constant integer");
8717
4
    const int64_t MaxIndex = 12;
8718
4
    int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8719
4
8720
4
    // The builtin semantics don't exactly match the xxinsertw instructions
8721
4
    // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8722
4
    // word from the first argument, and inserts it in the second argument. The
8723
4
    // instruction extracts the word from its second input register and inserts
8724
4
    // it into its first input register, so swap the first and second arguments.
8725
4
    std::swap(Ops[0], Ops[1]);
8726
4
8727
4
    // Need to cast the second argument from a vector of unsigned int to a
8728
4
    // vector of long long.
8729
4
    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8730
4
8731
4
    if (
getTarget().isLittleEndian()4
) {
8732
2
      // Create a shuffle mask of (1, 0)
8733
2
      Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8734
2
                                   ConstantInt::get(Int32Ty, 0)
8735
2
                                 };
8736
2
      Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8737
2
8738
2
      // Reverse the double words in the vector we will extract from.
8739
2
      Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8740
2
      Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8741
2
8742
2
      // Reverse the index.
8743
2
      Index = MaxIndex - Index;
8744
2
    }
8745
4
8746
4
    // Intrinsic expects the first arg to be a vector of int.
8747
4
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8748
4
    Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8749
4
    return Builder.CreateCall(F, Ops);
8750
0
  }
8751
0
8752
4
  case PPC::BI__builtin_vsx_extractuword: {
8753
4
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8754
4
8755
4
    // Intrinsic expects the first argument to be a vector of doublewords.
8756
4
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8757
4
8758
4
    // The second argument is a compile time constant int that needs to
8759
4
    // be clamped to the range [0, 12].
8760
4
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8761
4
    assert(ArgCI &&
8762
4
           "Second Arg to xxextractuw intrinsic must be a constant integer!");
8763
4
    const int64_t MaxIndex = 12;
8764
4
    int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8765
4
8766
4
    if (
getTarget().isLittleEndian()4
) {
8767
2
      // Reverse the index.
8768
2
      Index = MaxIndex - Index;
8769
2
      Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8770
2
8771
2
      // Emit the call, then reverse the double words of the results vector.
8772
2
      Value *Call = Builder.CreateCall(F, Ops);
8773
2
8774
2
      // Create a shuffle mask of (1, 0)
8775
2
      Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8776
2
                                   ConstantInt::get(Int32Ty, 0)
8777
2
                                 };
8778
2
      Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8779
2
8780
2
      Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8781
2
      return ShuffleCall;
8782
0
    } else {
8783
2
      Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8784
2
      return Builder.CreateCall(F, Ops);
8785
2
    }
8786
0
  }
8787
0
8788
22
  case PPC::BI__builtin_vsx_xxpermdi: {
8789
22
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8790
22
    assert(ArgCI && "Third arg must be constant integer!");
8791
22
8792
22
    unsigned Index = ArgCI->getZExtValue();
8793
22
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8794
22
    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8795
22
8796
22
    // Element zero comes from the first input vector and element one comes from
8797
22
    // the second. The element indices within each vector are numbered in big
8798
22
    // endian order so the shuffle mask must be adjusted for this on little
8799
22
    // endian platforms (i.e. index is complemented and source vector reversed).
8800
22
    unsigned ElemIdx0;
8801
22
    unsigned ElemIdx1;
8802
22
    if (
getTarget().isLittleEndian()22
) {
8803
11
      ElemIdx0 = (~Index & 1) + 2;
8804
11
      ElemIdx1 = (~Index & 2) >> 1;
8805
22
    } else { // BigEndian
8806
11
      ElemIdx0 = (Index & 2) >> 1;
8807
11
      ElemIdx1 = 2 + (Index & 1);
8808
11
    }
8809
22
8810
22
    Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8811
22
                                ConstantInt::get(Int32Ty, ElemIdx1)};
8812
22
    Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8813
22
8814
22
    Value *ShuffleCall =
8815
22
        Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8816
22
    QualType BIRetType = E->getType();
8817
22
    auto RetTy = ConvertType(BIRetType);
8818
22
    return Builder.CreateBitCast(ShuffleCall, RetTy);
8819
0
  }
8820
0
8821
22
  case PPC::BI__builtin_vsx_xxsldwi: {
8822
22
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8823
22
    assert(ArgCI && "Third argument must be a compile time constant");
8824
22
    unsigned Index = ArgCI->getZExtValue() & 0x3;
8825
22
    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8826
22
    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8827
22
8828
22
    // Create a shuffle mask
8829
22
    unsigned ElemIdx0;
8830
22
    unsigned ElemIdx1;
8831
22
    unsigned ElemIdx2;
8832
22
    unsigned ElemIdx3;
8833
22
    if (
getTarget().isLittleEndian()22
) {
8834
11
      // Little endian element N comes from element 8+N-Index of the
8835
11
      // concatenated wide vector (of course, using modulo arithmetic on
8836
11
      // the total number of elements).
8837
11
      ElemIdx0 = (8 - Index) % 8;
8838
11
      ElemIdx1 = (9 - Index) % 8;
8839
11
      ElemIdx2 = (10 - Index) % 8;
8840
11
      ElemIdx3 = (11 - Index) % 8;
8841
22
    } else {
8842
11
      // Big endian ElemIdx<N> = Index + N
8843
11
      ElemIdx0 = Index;
8844
11
      ElemIdx1 = Index + 1;
8845
11
      ElemIdx2 = Index + 2;
8846
11
      ElemIdx3 = Index + 3;
8847
11
    }
8848
22
8849
22
    Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8850
22
                                ConstantInt::get(Int32Ty, ElemIdx1),
8851
22
                                ConstantInt::get(Int32Ty, ElemIdx2),
8852
22
                                ConstantInt::get(Int32Ty, ElemIdx3)};
8853
22
8854
22
    Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8855
22
    Value *ShuffleCall =
8856
22
        Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8857
22
    QualType BIRetType = E->getType();
8858
22
    auto RetTy = ConvertType(BIRetType);
8859
22
    return Builder.CreateBitCast(ShuffleCall, RetTy);
8860
0
  }
8861
1.01k
  }
8862
1.01k
}
8863
8864
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8865
105
                                              const CallExpr *E) {
8866
105
  switch (BuiltinID) {
8867
4
  case AMDGPU::BI__builtin_amdgcn_div_scale:
8868
4
  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8869
4
    // Translate from the intrinsics's struct return to the builtin's out
8870
4
    // argument.
8871
4
8872
4
    Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8873
4
8874
4
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
8875
4
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8876
4
    llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8877
4
8878
4
    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8879
4
                                           X->getType());
8880
4
8881
4
    llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8882
4
8883
4
    llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8884
4
    llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8885
4
8886
4
    llvm::Type *RealFlagType
8887
4
      = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8888
4
8889
4
    llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8890
4
    Builder.CreateStore(FlagExt, FlagOutPtr);
8891
4
    return Result;
8892
4
  }
8893
4
  case AMDGPU::BI__builtin_amdgcn_div_fmas:
8894
4
  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8895
4
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8896
4
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8897
4
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8898
4
    llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8899
4
8900
4
    llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8901
4
                                      Src0->getType());
8902
4
    llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8903
4
    return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8904
4
  }
8905
4
8906
0
  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8907
0
    return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8908
2
  case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8909
2
    llvm::SmallVector<llvm::Value *, 5> Args;
8910
12
    for (unsigned I = 0; 
I != 512
;
++I10
)
8911
10
      Args.push_back(EmitScalarExpr(E->getArg(I)));
8912
2
    Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8913
2
                                    Args[0]->getType());
8914
2
    return Builder.CreateCall(F, Args);
8915
4
  }
8916
6
  case AMDGPU::BI__builtin_amdgcn_div_fixup:
8917
6
  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8918
6
  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8919
6
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8920
4
  case AMDGPU::BI__builtin_amdgcn_trig_preop:
8921
4
  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8922
4
    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8923
6
  case AMDGPU::BI__builtin_amdgcn_rcp:
8924
6
  case AMDGPU::BI__builtin_amdgcn_rcpf:
8925
6
  case AMDGPU::BI__builtin_amdgcn_rcph:
8926
6
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8927
6
  case AMDGPU::BI__builtin_amdgcn_rsq:
8928
6
  case AMDGPU::BI__builtin_amdgcn_rsqf:
8929
6
  case AMDGPU::BI__builtin_amdgcn_rsqh:
8930
6
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8931
4
  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8932
4
  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8933
4
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8934
4
  case AMDGPU::BI__builtin_amdgcn_sinf:
8935
4
  case AMDGPU::BI__builtin_amdgcn_sinh:
8936
4
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8937
4
  case AMDGPU::BI__builtin_amdgcn_cosf:
8938
4
  case AMDGPU::BI__builtin_amdgcn_cosh:
8939
4
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8940
2
  case AMDGPU::BI__builtin_amdgcn_log_clampf:
8941
2
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8942
6
  case AMDGPU::BI__builtin_amdgcn_ldexp:
8943
6
  case AMDGPU::BI__builtin_amdgcn_ldexpf:
8944
6
  case AMDGPU::BI__builtin_amdgcn_ldexph:
8945
6
    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8946
6
  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8947
6
  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8948
6
  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8949
6
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8950
4
  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8951
4
  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8952
4
    Value *Src0 = EmitScalarExpr(E->getArg(0));
8953
4
    Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8954
4
                                { Builder.getInt32Ty(), Src0->getType() });
8955
4
    return Builder.CreateCall(F, Src0);
8956
4
  }
8957
2
  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8958
2
    Value *Src0 = EmitScalarExpr(E->getArg(0));
8959
2
    Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8960
2
                                { Builder.getInt16Ty(), Src0->getType() });
8961
2
    return Builder.CreateCall(F, Src0);
8962
4
  }
8963
6
  case AMDGPU::BI__builtin_amdgcn_fract:
8964
6
  case AMDGPU::BI__builtin_amdgcn_fractf:
8965
6
  case AMDGPU::BI__builtin_amdgcn_fracth:
8966
6
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8967
0
  case AMDGPU::BI__builtin_amdgcn_lerp:
8968
0
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8969
8
  case AMDGPU::BI__builtin_amdgcn_uicmp:
8970
8
  case AMDGPU::BI__builtin_amdgcn_uicmpl:
8971
8
  case AMDGPU::BI__builtin_amdgcn_sicmp:
8972
8
  case AMDGPU::BI__builtin_amdgcn_sicmpl:
8973
8
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8974
4
  case AMDGPU::BI__builtin_amdgcn_fcmp:
8975
4
  case AMDGPU::BI__builtin_amdgcn_fcmpf:
8976
4
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8977
6
  case AMDGPU::BI__builtin_amdgcn_class:
8978
6
  case AMDGPU::BI__builtin_amdgcn_classf:
8979
6
  case AMDGPU::BI__builtin_amdgcn_classh:
8980
6
    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8981
5
  case AMDGPU::BI__builtin_amdgcn_fmed3f:
8982
5
  case AMDGPU::BI__builtin_amdgcn_fmed3h:
8983
5
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8984
2
  case AMDGPU::BI__builtin_amdgcn_read_exec: {
8985
2
    CallInst *CI = cast<CallInst>(
8986
2
      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8987
2
    CI->setConvergent();
8988
2
    return CI;
8989
5
  }
8990
5
8991
5
  // amdgcn workitem
8992
2
  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8993
2
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8994
2
  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8995
2
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8996
2
  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8997
2
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8998
5
8999
5
  // r600 intrinsics
9000
1
  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
9001
1
  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
9002
1
    return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
9003
1
  case AMDGPU::BI__builtin_r600_read_tidig_x:
9004
1
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
9005
1
  case AMDGPU::BI__builtin_r600_read_tidig_y:
9006
1
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
9007
1
  case AMDGPU::BI__builtin_r600_read_tidig_z:
9008
1
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
9009
0
  default:
9010
0
    return nullptr;
9011
0
  }
9012
0
}
9013
9014
/// Handle a SystemZ function in which the final argument is a pointer
9015
/// to an int that receives the post-instruction CC value.  At the LLVM level
9016
/// this is represented as a function that returns a {result, cc} pair.
9017
static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
9018
                                         unsigned IntrinsicID,
9019
626
                                         const CallExpr *E) {
9020
626
  unsigned NumArgs = E->getNumArgs() - 1;
9021
626
  SmallVector<Value *, 8> Args(NumArgs);
9022
1.99k
  for (unsigned I = 0; 
I < NumArgs1.99k
;
++I1.36k
)
9023
1.36k
    Args[I] = CGF.EmitScalarExpr(E->getArg(I));
9024
626
  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
9025
626
  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
9026
626
  Value *Call = CGF.Builder.CreateCall(F, Args);
9027
626
  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
9028
626
  CGF.Builder.CreateStore(CC, CCPtr);
9029
626
  return CGF.Builder.CreateExtractValue(Call, 0);
9030
626
}
9031
9032
Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
9033
815
                                               const CallExpr *E) {
9034
815
  switch (BuiltinID) {
9035
13
  case SystemZ::BI__builtin_tbegin: {
9036
13
    Value *TDB = EmitScalarExpr(E->getArg(0));
9037
13
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9038
13
    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
9039
13
    return Builder.CreateCall(F, {TDB, Control});
9040
815
  }
9041
16
  case SystemZ::BI__builtin_tbegin_nofloat: {
9042
16
    Value *TDB = EmitScalarExpr(E->getArg(0));
9043
16
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9044
16
    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
9045
16
    return Builder.CreateCall(F, {TDB, Control});
9046
815
  }
9047
2
  case SystemZ::BI__builtin_tbeginc: {
9048
2
    Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
9049
2
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
9050
2
    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
9051
2
    return Builder.CreateCall(F, {TDB, Control});
9052
815
  }
9053
10
  case SystemZ::BI__builtin_tabort: {
9054
10
    Value *Data = EmitScalarExpr(E->getArg(0));
9055
10
    Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
9056
10
    return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
9057
815
  }
9058
22
  case SystemZ::BI__builtin_non_tx_store: {
9059
22
    Value *Address = EmitScalarExpr(E->getArg(0));
9060
22
    Value *Data = EmitScalarExpr(E->getArg(1));
9061
22
    Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
9062
22
    return Builder.CreateCall(F, {Data, Address});
9063
815
  }
9064
815
9065
815
  // Vector builtins.  Note that most vector builtins are mapped automatically
9066
815
  // to target-specific LLVM intrinsics.  The ones handled specially here can
9067
815
  // be represented via standard LLVM IR, which is preferable to enable common
9068
815
  // LLVM optimizations.
9069
815
9070
12
  case SystemZ::BI__builtin_s390_vpopctb:
9071
12
  case SystemZ::BI__builtin_s390_vpopcth:
9072
12
  case SystemZ::BI__builtin_s390_vpopctf:
9073
12
  case SystemZ::BI__builtin_s390_vpopctg: {
9074
12
    llvm::Type *ResultType = ConvertType(E->getType());
9075
12
    Value *X = EmitScalarExpr(E->getArg(0));
9076
12
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9077
12
    return Builder.CreateCall(F, X);
9078
12
  }
9079
12
9080
12
  case SystemZ::BI__builtin_s390_vclzb:
9081
12
  case SystemZ::BI__builtin_s390_vclzh:
9082
12
  case SystemZ::BI__builtin_s390_vclzf:
9083
12
  case SystemZ::BI__builtin_s390_vclzg: {
9084
12
    llvm::Type *ResultType = ConvertType(E->getType());
9085
12
    Value *X = EmitScalarExpr(E->getArg(0));
9086
12
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9087
12
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
9088
12
    return Builder.CreateCall(F, {X, Undef});
9089
12
  }
9090
12
9091
12
  case SystemZ::BI__builtin_s390_vctzb:
9092
12
  case SystemZ::BI__builtin_s390_vctzh:
9093
12
  case SystemZ::BI__builtin_s390_vctzf:
9094
12
  case SystemZ::BI__builtin_s390_vctzg: {
9095
12
    llvm::Type *ResultType = ConvertType(E->getType());
9096
12
    Value *X = EmitScalarExpr(E->getArg(0));
9097
12
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9098
12
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
9099
12
    return Builder.CreateCall(F, {X, Undef});
9100
12
  }
9101
12
9102
5
  case SystemZ::BI__builtin_s390_vfsqsb:
9103
5
  case SystemZ::BI__builtin_s390_vfsqdb: {
9104
5
    llvm::Type *ResultType = ConvertType(E->getType());
9105
5
    Value *X = EmitScalarExpr(E->getArg(0));
9106
5
    Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
9107
5
    return Builder.CreateCall(F, X);
9108
5
  }
9109
5
  case SystemZ::BI__builtin_s390_vfmasb:
9110
5
  case SystemZ::BI__builtin_s390_vfmadb: {
9111
5
    llvm::Type *ResultType = ConvertType(E->getType());
9112
5
    Value *X = EmitScalarExpr(E->getArg(0));
9113
5
    Value *Y = EmitScalarExpr(E->getArg(1));
9114
5
    Value *Z = EmitScalarExpr(E->getArg(2));
9115
5
    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9116
5
    return Builder.CreateCall(F, {X, Y, Z});
9117
5
  }
9118
5
  case SystemZ::BI__builtin_s390_vfmssb:
9119
5
  case SystemZ::BI__builtin_s390_vfmsdb: {
9120
5
    llvm::Type *ResultType = ConvertType(E->getType());
9121
5
    Value *X = EmitScalarExpr(E->getArg(0));
9122
5
    Value *Y = EmitScalarExpr(E->getArg(1));
9123
5
    Value *Z = EmitScalarExpr(E->getArg(2));
9124
5
    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9125
5
    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9126
5
    return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9127
5
  }
9128
4
  case SystemZ::BI__builtin_s390_vfnmasb:
9129
4
  case SystemZ::BI__builtin_s390_vfnmadb: {
9130
4
    llvm::Type *ResultType = ConvertType(E->getType());
9131
4
    Value *X = EmitScalarExpr(E->getArg(0));
9132
4
    Value *Y = EmitScalarExpr(E->getArg(1));
9133
4
    Value *Z = EmitScalarExpr(E->getArg(2));
9134
4
    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9135
4
    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9136
4
    return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
9137
4
  }
9138
4
  case SystemZ::BI__builtin_s390_vfnmssb:
9139
4
  case SystemZ::BI__builtin_s390_vfnmsdb: {
9140
4
    llvm::Type *ResultType = ConvertType(E->getType());
9141
4
    Value *X = EmitScalarExpr(E->getArg(0));
9142
4
    Value *Y = EmitScalarExpr(E->getArg(1));
9143
4
    Value *Z = EmitScalarExpr(E->getArg(2));
9144
4
    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9145
4
    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9146
4
    Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
9147
4
    return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
9148
4
  }
9149
5
  case SystemZ::BI__builtin_s390_vflpsb:
9150
5
  case SystemZ::BI__builtin_s390_vflpdb: {
9151
5
    llvm::Type *ResultType = ConvertType(E->getType());
9152
5
    Value *X = EmitScalarExpr(E->getArg(0));
9153
5
    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9154
5
    return Builder.CreateCall(F, X);
9155
5
  }
9156
5
  case SystemZ::BI__builtin_s390_vflnsb:
9157
5
  case SystemZ::BI__builtin_s390_vflndb: {
9158
5
    llvm::Type *ResultType = ConvertType(E->getType());
9159
5
    Value *X = EmitScalarExpr(E->getArg(0));
9160
5
    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9161
5
    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9162
5
    return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
9163
5
  }
9164
41
  case SystemZ::BI__builtin_s390_vfisb:
9165
41
  case SystemZ::BI__builtin_s390_vfidb: {
9166
41
    llvm::Type *ResultType = ConvertType(E->getType());
9167
41
    Value *X = EmitScalarExpr(E->getArg(0));
9168
41
    // Constant-fold the M4 and M5 mask arguments.
9169
41
    llvm::APSInt M4, M5;
9170
41
    bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
9171
41
    bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
9172
41
    assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
9173
41
    (void)IsConstM4; (void)IsConstM5;
9174
41
    // Check whether this instance can be represented via a LLVM standard
9175
41
    // intrinsic.  We only support some combinations of M4 and M5.
9176
41
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
9177
41
    switch (M4.getZExtValue()) {
9178
0
    default: break;
9179
5
    case 0:  // IEEE-inexact exception allowed
9180
5
      switch (M5.getZExtValue()) {
9181
0
      default: break;
9182
5
      case 0: ID = Intrinsic::rint; break;
9183
5
      }
9184
5
      break;
9185
36
    case 4:  // IEEE-inexact exception suppressed
9186
36
      switch (M5.getZExtValue()) {
9187
5
      default: break;
9188
5
      case 0: ID = Intrinsic::nearbyint; break;
9189
2
      case 1: ID = Intrinsic::round; break;
9190
8
      case 5: ID = Intrinsic::trunc; break;
9191
8
      case 6: ID = Intrinsic::ceil; break;
9192
8
      case 7: ID = Intrinsic::floor; break;
9193
36
      }
9194
36
      break;
9195
41
    }
9196
41
    
if (41
ID != Intrinsic::not_intrinsic41
) {
9197
36
      Function *F = CGM.getIntrinsic(ID, ResultType);
9198
36
      return Builder.CreateCall(F, X);
9199
36
    }
9200
5
    switch (BuiltinID) {
9201
2
      case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
9202
3
      case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
9203
0
      
default: 0
llvm_unreachable0
("Unknown BuiltinID");
9204
5
    }
9205
5
    Function *F = CGM.getIntrinsic(ID);
9206
5
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9207
5
    Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
9208
5
    return Builder.CreateCall(F, {X, M4Value, M5Value});
9209
5
  }
9210
8
  case SystemZ::BI__builtin_s390_vfmaxsb:
9211
8
  case SystemZ::BI__builtin_s390_vfmaxdb: {
9212
8
    llvm::Type *ResultType = ConvertType(E->getType());
9213
8
    Value *X = EmitScalarExpr(E->getArg(0));
9214
8
    Value *Y = EmitScalarExpr(E->getArg(1));
9215
8
    // Constant-fold the M4 mask argument.
9216
8
    llvm::APSInt M4;
9217
8
    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9218
8
    assert(IsConstM4 && "Constant arg isn't actually constant?");
9219
8
    (void)IsConstM4;
9220
8
    // Check whether this instance can be represented via a LLVM standard
9221
8
    // intrinsic.  We only support some values of M4.
9222
8
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
9223
8
    switch (M4.getZExtValue()) {
9224
6
    default: break;
9225
2
    case 4: ID = Intrinsic::maxnum; break;
9226
8
    }
9227
8
    
if (8
ID != Intrinsic::not_intrinsic8
) {
9228
2
      Function *F = CGM.getIntrinsic(ID, ResultType);
9229
2
      return Builder.CreateCall(F, {X, Y});
9230
2
    }
9231
6
    switch (BuiltinID) {
9232
3
      case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
9233
3
      case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
9234
0
      
default: 0
llvm_unreachable0
("Unknown BuiltinID");
9235
6
    }
9236
6
    Function *F = CGM.getIntrinsic(ID);
9237
6
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9238
6
    return Builder.CreateCall(F, {X, Y, M4Value});
9239
6
  }
9240
8
  case SystemZ::BI__builtin_s390_vfminsb:
9241
8
  case SystemZ::BI__builtin_s390_vfmindb: {
9242
8
    llvm::Type *ResultType = ConvertType(E->getType());
9243
8
    Value *X = EmitScalarExpr(E->getArg(0));
9244
8
    Value *Y = EmitScalarExpr(E->getArg(1));
9245
8
    // Constant-fold the M4 mask argument.
9246
8
    llvm::APSInt M4;
9247
8
    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9248
8
    assert(IsConstM4 && "Constant arg isn't actually constant?");
9249
8
    (void)IsConstM4;
9250
8
    // Check whether this instance can be represented via a LLVM standard
9251
8
    // intrinsic.  We only support some values of M4.
9252
8
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
9253
8
    switch (M4.getZExtValue()) {
9254
6
    default: break;
9255
2
    case 4: ID = Intrinsic::minnum; break;
9256
8
    }
9257
8
    
if (8
ID != Intrinsic::not_intrinsic8
) {
9258
2
      Function *F = CGM.getIntrinsic(ID, ResultType);
9259
2
      return Builder.CreateCall(F, {X, Y});
9260
2
    }
9261
6
    switch (BuiltinID) {
9262
3
      case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
9263
3
      case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
9264
0
      
default: 0
llvm_unreachable0
("Unknown BuiltinID");
9265
6
    }
9266
6
    Function *F = CGM.getIntrinsic(ID);
9267
6
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9268
6
    return Builder.CreateCall(F, {X, Y, M4Value});
9269
6
  }
9270
6
9271
6
  // Vector intrisincs that output the post-instruction CC value.
9272
6
9273
6
#define INTRINSIC_WITH_CC(NAME) \
9274
626
    case SystemZ::BI__builtin_##NAME: \
9275
626
      return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
9276
6
9277
2
  
INTRINSIC_WITH_CC2
(s390_vpkshs);
9278
2
  
INTRINSIC_WITH_CC2
(s390_vpksfs);
9279
2
  
INTRINSIC_WITH_CC2
(s390_vpksgs);
9280
6
9281
3
  
INTRINSIC_WITH_CC3
(s390_vpklshs);
9282
3
  
INTRINSIC_WITH_CC3
(s390_vpklsfs);
9283
3
  
INTRINSIC_WITH_CC3
(s390_vpklsgs);
9284
6
9285
29
  
INTRINSIC_WITH_CC29
(s390_vceqbs);
9286
29
  
INTRINSIC_WITH_CC29
(s390_vceqhs);
9287
29
  
INTRINSIC_WITH_CC29
(s390_vceqfs);
9288
29
  
INTRINSIC_WITH_CC29
(s390_vceqgs);
9289
6
9290
25
  
INTRINSIC_WITH_CC25
(s390_vchbs);
9291
25
  
INTRINSIC_WITH_CC25
(s390_vchhs);
9292
25
  
INTRINSIC_WITH_CC25
(s390_vchfs);
9293
25
  
INTRINSIC_WITH_CC25
(s390_vchgs);
9294
6
9295
33
  
INTRINSIC_WITH_CC33
(s390_vchlbs);
9296
33
  
INTRINSIC_WITH_CC33
(s390_vchlhs);
9297
33
  
INTRINSIC_WITH_CC33
(s390_vchlfs);
9298
33
  
INTRINSIC_WITH_CC33
(s390_vchlgs);
9299
6
9300
14
  
INTRINSIC_WITH_CC14
(s390_vfaebs);
9301
14
  
INTRINSIC_WITH_CC14
(s390_vfaehs);
9302
14
  
INTRINSIC_WITH_CC14
(s390_vfaefs);
9303
6
9304
8
  
INTRINSIC_WITH_CC8
(s390_vfaezbs);
9305
8
  
INTRINSIC_WITH_CC8
(s390_vfaezhs);
9306
8
  
INTRINSIC_WITH_CC8
(s390_vfaezfs);
9307
6
9308
4
  
INTRINSIC_WITH_CC4
(s390_vfeebs);
9309
4
  
INTRINSIC_WITH_CC4
(s390_vfeehs);
9310
4
  
INTRINSIC_WITH_CC4
(s390_vfeefs);
9311
6
9312
4
  
INTRINSIC_WITH_CC4
(s390_vfeezbs);
9313
4
  
INTRINSIC_WITH_CC4
(s390_vfeezhs);
9314
4
  
INTRINSIC_WITH_CC4
(s390_vfeezfs);
9315
6
9316
4
  
INTRINSIC_WITH_CC4
(s390_vfenebs);
9317
4
  
INTRINSIC_WITH_CC4
(s390_vfenehs);
9318
4
  
INTRINSIC_WITH_CC4
(s390_vfenefs);
9319
6
9320
4
  
INTRINSIC_WITH_CC4
(s390_vfenezbs);
9321
4
  
INTRINSIC_WITH_CC4
(s390_vfenezhs);
9322
4
  
INTRINSIC_WITH_CC4
(s390_vfenezfs);
9323
6
9324
4
  
INTRINSIC_WITH_CC4
(s390_vistrbs);
9325
4
  
INTRINSIC_WITH_CC4
(s390_vistrhs);
9326
4
  
INTRINSIC_WITH_CC4
(s390_vistrfs);
9327
6
9328
6
  
INTRINSIC_WITH_CC6
(s390_vstrcbs);
9329
6
  
INTRINSIC_WITH_CC6
(s390_vstrchs);
9330
6
  
INTRINSIC_WITH_CC6
(s390_vstrcfs);
9331
6
9332
4
  
INTRINSIC_WITH_CC4
(s390_vstrczbs);
9333
4
  
INTRINSIC_WITH_CC4
(s390_vstrczhs);
9334
4
  
INTRINSIC_WITH_CC4
(s390_vstrczfs);
9335
6
9336
5
  
INTRINSIC_WITH_CC5
(s390_vfcesbs);
9337
9
  
INTRINSIC_WITH_CC9
(s390_vfcedbs);
9338
9
  
INTRINSIC_WITH_CC9
(s390_vfchsbs);
9339
17
  
INTRINSIC_WITH_CC17
(s390_vfchdbs);
9340
8
  
INTRINSIC_WITH_CC8
(s390_vfchesbs);
9341
17
  
INTRINSIC_WITH_CC17
(s390_vfchedbs);
9342
6
9343
8
  
INTRINSIC_WITH_CC8
(s390_vftcisb);
9344
34
  
INTRINSIC_WITH_CC34
(s390_vftcidb);
9345
6
9346
6
#undef INTRINSIC_WITH_CC
9347
6
9348
0
  default:
9349
0
    return nullptr;
9350
0
  }
9351
0
}
9352
9353
Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
9354
370
                                             const CallExpr *E) {
9355
54
  auto MakeLdg = [&](unsigned IntrinsicID) {
9356
54
    Value *Ptr = EmitScalarExpr(E->getArg(0));
9357
54
    clang::CharUnits Align =
9358
54
        getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
9359
54
    return Builder.CreateCall(
9360
54
        CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9361
54
                                       Ptr->getType()}),
9362
54
        {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9363
54
  };
9364
186
  auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9365
186
    Value *Ptr = EmitScalarExpr(E->getArg(0));
9366
186
    return Builder.CreateCall(
9367
186
        CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9368
186
                                       Ptr->getType()}),
9369
186
        {Ptr, EmitScalarExpr(E->getArg(1))});
9370
186
  };
9371
370
  switch (BuiltinID) {
9372
9
  case NVPTX::BI__nvvm_atom_add_gen_i:
9373
9
  case NVPTX::BI__nvvm_atom_add_gen_l:
9374
9
  case NVPTX::BI__nvvm_atom_add_gen_ll:
9375
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9376
9
9377
9
  case NVPTX::BI__nvvm_atom_sub_gen_i:
9378
9
  case NVPTX::BI__nvvm_atom_sub_gen_l:
9379
9
  case NVPTX::BI__nvvm_atom_sub_gen_ll:
9380
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9381
9
9382
9
  case NVPTX::BI__nvvm_atom_and_gen_i:
9383
9
  case NVPTX::BI__nvvm_atom_and_gen_l:
9384
9
  case NVPTX::BI__nvvm_atom_and_gen_ll:
9385
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
9386
9
9387
9
  case NVPTX::BI__nvvm_atom_or_gen_i:
9388
9
  case NVPTX::BI__nvvm_atom_or_gen_l:
9389
9
  case NVPTX::BI__nvvm_atom_or_gen_ll:
9390
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9391
9
9392
9
  case NVPTX::BI__nvvm_atom_xor_gen_i:
9393
9
  case NVPTX::BI__nvvm_atom_xor_gen_l:
9394
9
  case NVPTX::BI__nvvm_atom_xor_gen_ll:
9395
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9396
9
9397
9
  case NVPTX::BI__nvvm_atom_xchg_gen_i:
9398
9
  case NVPTX::BI__nvvm_atom_xchg_gen_l:
9399
9
  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9400
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9401
9
9402
9
  case NVPTX::BI__nvvm_atom_max_gen_i:
9403
9
  case NVPTX::BI__nvvm_atom_max_gen_l:
9404
9
  case NVPTX::BI__nvvm_atom_max_gen_ll:
9405
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9406
9
9407
9
  case NVPTX::BI__nvvm_atom_max_gen_ui:
9408
9
  case NVPTX::BI__nvvm_atom_max_gen_ul:
9409
9
  case NVPTX::BI__nvvm_atom_max_gen_ull:
9410
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9411
9
9412
9
  case NVPTX::BI__nvvm_atom_min_gen_i:
9413
9
  case NVPTX::BI__nvvm_atom_min_gen_l:
9414
9
  case NVPTX::BI__nvvm_atom_min_gen_ll:
9415
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9416
9
9417
9
  case NVPTX::BI__nvvm_atom_min_gen_ui:
9418
9
  case NVPTX::BI__nvvm_atom_min_gen_ul:
9419
9
  case NVPTX::BI__nvvm_atom_min_gen_ull:
9420
9
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9421
9
9422
9
  case NVPTX::BI__nvvm_atom_cas_gen_i:
9423
9
  case NVPTX::BI__nvvm_atom_cas_gen_l:
9424
9
  case NVPTX::BI__nvvm_atom_cas_gen_ll:
9425
9
    // __nvvm_atom_cas_gen_* should return the old value rather than the
9426
9
    // success flag.
9427
9
    return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9428
9
9429
3
  case NVPTX::BI__nvvm_atom_add_gen_f: {
9430
3
    Value *Ptr = EmitScalarExpr(E->getArg(0));
9431
3
    Value *Val = EmitScalarExpr(E->getArg(1));
9432
3
    // atomicrmw only deals with integer arguments so we need to use
9433
3
    // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9434
3
    Value *FnALAF32 =
9435
3
        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9436
3
    return Builder.CreateCall(FnALAF32, {Ptr, Val});
9437
9
  }
9438
9
9439
3
  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9440
3
    Value *Ptr = EmitScalarExpr(E->getArg(0));
9441
3
    Value *Val = EmitScalarExpr(E->getArg(1));
9442
3
    Value *FnALI32 =
9443
3
        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9444
3
    return Builder.CreateCall(FnALI32, {Ptr, Val});
9445
9
  }
9446
9
9447
3
  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9448
3
    Value *Ptr = EmitScalarExpr(E->getArg(0));
9449
3
    Value *Val = EmitScalarExpr(E->getArg(1));
9450
3
    Value *FnALD32 =
9451
3
        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9452
3
    return Builder.CreateCall(FnALD32, {Ptr, Val});
9453
9
  }
9454
9
9455
44
  case NVPTX::BI__nvvm_ldg_c:
9456
44
  case NVPTX::BI__nvvm_ldg_c2:
9457
44
  case NVPTX::BI__nvvm_ldg_c4:
9458
44
  case NVPTX::BI__nvvm_ldg_s:
9459
44
  case NVPTX::BI__nvvm_ldg_s2:
9460
44
  case NVPTX::BI__nvvm_ldg_s4:
9461
44
  case NVPTX::BI__nvvm_ldg_i:
9462
44
  case NVPTX::BI__nvvm_ldg_i2:
9463
44
  case NVPTX::BI__nvvm_ldg_i4:
9464
44
  case NVPTX::BI__nvvm_ldg_l:
9465
44
  case NVPTX::BI__nvvm_ldg_ll:
9466
44
  case NVPTX::BI__nvvm_ldg_ll2:
9467
44
  case NVPTX::BI__nvvm_ldg_uc:
9468
44
  case NVPTX::BI__nvvm_ldg_uc2:
9469
44
  case NVPTX::BI__nvvm_ldg_uc4:
9470
44
  case NVPTX::BI__nvvm_ldg_us:
9471
44
  case NVPTX::BI__nvvm_ldg_us2:
9472
44
  case NVPTX::BI__nvvm_ldg_us4:
9473
44
  case NVPTX::BI__nvvm_ldg_ui:
9474
44
  case NVPTX::BI__nvvm_ldg_ui2:
9475
44
  case NVPTX::BI__nvvm_ldg_ui4:
9476
44
  case NVPTX::BI__nvvm_ldg_ul:
9477
44
  case NVPTX::BI__nvvm_ldg_ull:
9478
44
  case NVPTX::BI__nvvm_ldg_ull2:
9479
44
    // PTX Interoperability section 2.2: "For a vector with an even number of
9480
44
    // elements, its alignment is set to number of elements times the alignment
9481
44
    // of its member: n*alignof(t)."
9482
44
    return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9483
10
  case NVPTX::BI__nvvm_ldg_f:
9484
10
  case NVPTX::BI__nvvm_ldg_f2:
9485
10
  case NVPTX::BI__nvvm_ldg_f4:
9486
10
  case NVPTX::BI__nvvm_ldg_d:
9487
10
  case NVPTX::BI__nvvm_ldg_d2:
9488
10
    return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9489
10
9490
9
  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9491
9
  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9492
9
  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9493
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9494
9
  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9495
9
  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9496
9
  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9497
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9498
6
  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9499
6
  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9500
6
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9501
6
  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9502
6
  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9503
6
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9504
9
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9505
9
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9506
9
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9507
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9508
9
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9509
9
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9510
9
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9511
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9512
18
  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9513
18
  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9514
18
  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9515
18
  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9516
18
  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9517
18
  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9518
18
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9519
18
  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9520
18
  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9521
18
  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9522
18
  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9523
18
  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9524
18
  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9525
18
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9526
18
  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9527
18
  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9528
18
  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9529
18
  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9530
18
  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9531
18
  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9532
18
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9533
18
  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9534
18
  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9535
18
  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9536
18
  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9537
18
  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9538
18
  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9539
18
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9540
3
  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9541
3
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9542
3
  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9543
3
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9544
3
  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9545
3
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9546
3
  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9547
3
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9548
9
  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9549
9
  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9550
9
  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9551
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9552
9
  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9553
9
  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9554
9
  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9555
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9556
9
  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9557
9
  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9558
9
  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9559
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9560
9
  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9561
9
  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9562
9
  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9563
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9564
9
  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9565
9
  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9566
9
  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9567
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9568
9
  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9569
9
  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9570
9
  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9571
9
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9572
9
  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9573
9
  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9574
9
  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9575
9
    Value *Ptr = EmitScalarExpr(E->getArg(0));
9576
9
    return Builder.CreateCall(
9577
9
        CGM.getIntrinsic(
9578
9
            Intrinsic::nvvm_atomic_cas_gen_i_cta,
9579
9
            {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9580
9
        {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9581
9
  }
9582
9
  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9583
9
  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9584
9
  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9585
9
    Value *Ptr = EmitScalarExpr(E->getArg(0));
9586
9
    return Builder.CreateCall(
9587
9
        CGM.getIntrinsic(
9588
9
            Intrinsic::nvvm_atomic_cas_gen_i_sys,
9589
9
            {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9590
9
        {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9591
9
  }
9592
4
  case NVPTX::BI__nvvm_match_all_sync_i32p:
9593
4
  case NVPTX::BI__nvvm_match_all_sync_i64p: {
9594
4
    Value *Mask = EmitScalarExpr(E->getArg(0));
9595
4
    Value *Val = EmitScalarExpr(E->getArg(1));
9596
4
    Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
9597
4
    Value *ResultPair = Builder.CreateCall(
9598
4
        CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
9599
2
                             ? Intrinsic::nvvm_match_all_sync_i32p
9600
2
                             : Intrinsic::nvvm_match_all_sync_i64p),
9601
4
        {Mask, Val});
9602
4
    Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
9603
4
                                     PredOutPtr.getElementType());
9604
4
    Builder.CreateStore(Pred, PredOutPtr);
9605
4
    return Builder.CreateExtractValue(ResultPair, 0);
9606
4
  }
9607
0
  default:
9608
0
    return nullptr;
9609
0
  }
9610
0
}
9611
9612
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9613
8
                                                   const CallExpr *E) {
9614
8
  switch (BuiltinID) {
9615
2
  case WebAssembly::BI__builtin_wasm_current_memory: {
9616
2
    llvm::Type *ResultType = ConvertType(E->getType());
9617
2
    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9618
2
    return Builder.CreateCall(Callee);
9619
8
  }
9620
2
  case WebAssembly::BI__builtin_wasm_grow_memory: {
9621
2
    Value *X = EmitScalarExpr(E->getArg(0));
9622
2
    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9623
2
    return Builder.CreateCall(Callee, X);
9624
8
  }
9625
2
  case WebAssembly::BI__builtin_wasm_throw: {
9626
2
    Value *Tag = EmitScalarExpr(E->getArg(0));
9627
2
    Value *Obj = EmitScalarExpr(E->getArg(1));
9628
2
    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
9629
2
    return Builder.CreateCall(Callee, {Tag, Obj});
9630
8
  }
9631
2
  case WebAssembly::BI__builtin_wasm_rethrow: {
9632
2
    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
9633
2
    return Builder.CreateCall(Callee);
9634
8
  }
9635
8
9636
0
  default:
9637
0
    return nullptr;
9638
0
  }
9639
0
}