Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation  ----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the AArch64TargetLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AArch64ExpandImm.h"
14
#include "AArch64ISelLowering.h"
15
#include "AArch64CallingConvention.h"
16
#include "AArch64MachineFunctionInfo.h"
17
#include "AArch64PerfectShuffle.h"
18
#include "AArch64RegisterInfo.h"
19
#include "AArch64Subtarget.h"
20
#include "MCTargetDesc/AArch64AddressingModes.h"
21
#include "Utils/AArch64BaseInfo.h"
22
#include "llvm/ADT/APFloat.h"
23
#include "llvm/ADT/APInt.h"
24
#include "llvm/ADT/ArrayRef.h"
25
#include "llvm/ADT/STLExtras.h"
26
#include "llvm/ADT/SmallVector.h"
27
#include "llvm/ADT/Statistic.h"
28
#include "llvm/ADT/StringRef.h"
29
#include "llvm/ADT/StringSwitch.h"
30
#include "llvm/ADT/Triple.h"
31
#include "llvm/ADT/Twine.h"
32
#include "llvm/Analysis/VectorUtils.h"
33
#include "llvm/CodeGen/CallingConvLower.h"
34
#include "llvm/CodeGen/MachineBasicBlock.h"
35
#include "llvm/CodeGen/MachineFrameInfo.h"
36
#include "llvm/CodeGen/MachineFunction.h"
37
#include "llvm/CodeGen/MachineInstr.h"
38
#include "llvm/CodeGen/MachineInstrBuilder.h"
39
#include "llvm/CodeGen/MachineMemOperand.h"
40
#include "llvm/CodeGen/MachineRegisterInfo.h"
41
#include "llvm/CodeGen/RuntimeLibcalls.h"
42
#include "llvm/CodeGen/SelectionDAG.h"
43
#include "llvm/CodeGen/SelectionDAGNodes.h"
44
#include "llvm/CodeGen/TargetCallingConv.h"
45
#include "llvm/CodeGen/TargetInstrInfo.h"
46
#include "llvm/CodeGen/ValueTypes.h"
47
#include "llvm/IR/Attributes.h"
48
#include "llvm/IR/Constants.h"
49
#include "llvm/IR/DataLayout.h"
50
#include "llvm/IR/DebugLoc.h"
51
#include "llvm/IR/DerivedTypes.h"
52
#include "llvm/IR/Function.h"
53
#include "llvm/IR/GetElementPtrTypeIterator.h"
54
#include "llvm/IR/GlobalValue.h"
55
#include "llvm/IR/IRBuilder.h"
56
#include "llvm/IR/Instruction.h"
57
#include "llvm/IR/Instructions.h"
58
#include "llvm/IR/IntrinsicInst.h"
59
#include "llvm/IR/Intrinsics.h"
60
#include "llvm/IR/Module.h"
61
#include "llvm/IR/OperandTraits.h"
62
#include "llvm/IR/PatternMatch.h"
63
#include "llvm/IR/Type.h"
64
#include "llvm/IR/Use.h"
65
#include "llvm/IR/Value.h"
66
#include "llvm/MC/MCRegisterInfo.h"
67
#include "llvm/Support/Casting.h"
68
#include "llvm/Support/CodeGen.h"
69
#include "llvm/Support/CommandLine.h"
70
#include "llvm/Support/Compiler.h"
71
#include "llvm/Support/Debug.h"
72
#include "llvm/Support/ErrorHandling.h"
73
#include "llvm/Support/KnownBits.h"
74
#include "llvm/Support/MachineValueType.h"
75
#include "llvm/Support/MathExtras.h"
76
#include "llvm/Support/raw_ostream.h"
77
#include "llvm/Target/TargetMachine.h"
78
#include "llvm/Target/TargetOptions.h"
79
#include <algorithm>
80
#include <bitset>
81
#include <cassert>
82
#include <cctype>
83
#include <cstdint>
84
#include <cstdlib>
85
#include <iterator>
86
#include <limits>
87
#include <tuple>
88
#include <utility>
89
#include <vector>
90
91
using namespace llvm;
92
using namespace llvm::PatternMatch;
93
94
#define DEBUG_TYPE "aarch64-lower"
95
96
STATISTIC(NumTailCalls, "Number of tail calls");
97
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
98
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
99
100
static cl::opt<bool>
101
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
102
                           cl::desc("Allow AArch64 SLI/SRI formation"),
103
                           cl::init(false));
104
105
// FIXME: The necessary dtprel relocations don't seem to be supported
106
// well in the GNU bfd and gold linkers at the moment. Therefore, by
107
// default, for now, fall back to GeneralDynamic code generation.
108
cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
109
    "aarch64-elf-ldtls-generation", cl::Hidden,
110
    cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
111
    cl::init(false));
112
113
static cl::opt<bool>
114
EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
115
                         cl::desc("Enable AArch64 logical imm instruction "
116
                                  "optimization"),
117
                         cl::init(true));
118
119
/// Value type used for condition codes.
120
static const MVT MVT_CC = MVT::i32;
121
122
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
123
                                             const AArch64Subtarget &STI)
124
9.10k
    : TargetLowering(TM), Subtarget(&STI) {
125
9.10k
  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
126
9.10k
  // we have to make something up. Arbitrarily, choose ZeroOrOne.
127
9.10k
  setBooleanContents(ZeroOrOneBooleanContent);
128
9.10k
  // When comparing vectors the result sets the different elements in the
129
9.10k
  // vector to all-one or all-zero.
130
9.10k
  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
131
9.10k
132
9.10k
  // Set up the register classes.
133
9.10k
  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
134
9.10k
  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
135
9.10k
136
9.10k
  if (Subtarget->hasFPARMv8()) {
137
9.08k
    addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
138
9.08k
    addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
139
9.08k
    addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
140
9.08k
    addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
141
9.08k
  }
142
9.10k
143
9.10k
  if (Subtarget->hasNEON()) {
144
9.08k
    addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
145
9.08k
    addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
146
9.08k
    // Someone set us up the NEON.
147
9.08k
    addDRTypeForNEON(MVT::v2f32);
148
9.08k
    addDRTypeForNEON(MVT::v8i8);
149
9.08k
    addDRTypeForNEON(MVT::v4i16);
150
9.08k
    addDRTypeForNEON(MVT::v2i32);
151
9.08k
    addDRTypeForNEON(MVT::v1i64);
152
9.08k
    addDRTypeForNEON(MVT::v1f64);
153
9.08k
    addDRTypeForNEON(MVT::v4f16);
154
9.08k
155
9.08k
    addQRTypeForNEON(MVT::v4f32);
156
9.08k
    addQRTypeForNEON(MVT::v2f64);
157
9.08k
    addQRTypeForNEON(MVT::v16i8);
158
9.08k
    addQRTypeForNEON(MVT::v8i16);
159
9.08k
    addQRTypeForNEON(MVT::v4i32);
160
9.08k
    addQRTypeForNEON(MVT::v2i64);
161
9.08k
    addQRTypeForNEON(MVT::v8f16);
162
9.08k
  }
163
9.10k
164
9.10k
  // Compute derived properties from the register classes
165
9.10k
  computeRegisterProperties(Subtarget->getRegisterInfo());
166
9.10k
167
9.10k
  // Provide all sorts of operation actions
168
9.10k
  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
169
9.10k
  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
170
9.10k
  setOperationAction(ISD::SETCC, MVT::i32, Custom);
171
9.10k
  setOperationAction(ISD::SETCC, MVT::i64, Custom);
172
9.10k
  setOperationAction(ISD::SETCC, MVT::f16, Custom);
173
9.10k
  setOperationAction(ISD::SETCC, MVT::f32, Custom);
174
9.10k
  setOperationAction(ISD::SETCC, MVT::f64, Custom);
175
9.10k
  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
176
9.10k
  setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
177
9.10k
  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
178
9.10k
  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
179
9.10k
  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
180
9.10k
  setOperationAction(ISD::BR_CC, MVT::f16, Custom);
181
9.10k
  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
182
9.10k
  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
183
9.10k
  setOperationAction(ISD::SELECT, MVT::i32, Custom);
184
9.10k
  setOperationAction(ISD::SELECT, MVT::i64, Custom);
185
9.10k
  setOperationAction(ISD::SELECT, MVT::f16, Custom);
186
9.10k
  setOperationAction(ISD::SELECT, MVT::f32, Custom);
187
9.10k
  setOperationAction(ISD::SELECT, MVT::f64, Custom);
188
9.10k
  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
189
9.10k
  setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
190
9.10k
  setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
191
9.10k
  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
192
9.10k
  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
193
9.10k
  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
194
9.10k
  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
195
9.10k
196
9.10k
  setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
197
9.10k
  setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
198
9.10k
  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
199
9.10k
200
9.10k
  setOperationAction(ISD::FREM, MVT::f32, Expand);
201
9.10k
  setOperationAction(ISD::FREM, MVT::f64, Expand);
202
9.10k
  setOperationAction(ISD::FREM, MVT::f80, Expand);
203
9.10k
204
9.10k
  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
205
9.10k
206
9.10k
  // Custom lowering hooks are needed for XOR
207
9.10k
  // to fold it into CSINC/CSINV.
208
9.10k
  setOperationAction(ISD::XOR, MVT::i32, Custom);
209
9.10k
  setOperationAction(ISD::XOR, MVT::i64, Custom);
210
9.10k
211
9.10k
  // Virtually no operation on f128 is legal, but LLVM can't expand them when
212
9.10k
  // there's a valid register class, so we need custom operations in most cases.
213
9.10k
  setOperationAction(ISD::FABS, MVT::f128, Expand);
214
9.10k
  setOperationAction(ISD::FADD, MVT::f128, Custom);
215
9.10k
  setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
216
9.10k
  setOperationAction(ISD::FCOS, MVT::f128, Expand);
217
9.10k
  setOperationAction(ISD::FDIV, MVT::f128, Custom);
218
9.10k
  setOperationAction(ISD::FMA, MVT::f128, Expand);
219
9.10k
  setOperationAction(ISD::FMUL, MVT::f128, Custom);
220
9.10k
  setOperationAction(ISD::FNEG, MVT::f128, Expand);
221
9.10k
  setOperationAction(ISD::FPOW, MVT::f128, Expand);
222
9.10k
  setOperationAction(ISD::FREM, MVT::f128, Expand);
223
9.10k
  setOperationAction(ISD::FRINT, MVT::f128, Expand);
224
9.10k
  setOperationAction(ISD::FSIN, MVT::f128, Expand);
225
9.10k
  setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
226
9.10k
  setOperationAction(ISD::FSQRT, MVT::f128, Expand);
227
9.10k
  setOperationAction(ISD::FSUB, MVT::f128, Custom);
228
9.10k
  setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
229
9.10k
  setOperationAction(ISD::SETCC, MVT::f128, Custom);
230
9.10k
  setOperationAction(ISD::BR_CC, MVT::f128, Custom);
231
9.10k
  setOperationAction(ISD::SELECT, MVT::f128, Custom);
232
9.10k
  setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
233
9.10k
  setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
234
9.10k
235
9.10k
  // Lowering for many of the conversions is actually specified by the non-f128
236
9.10k
  // type. The LowerXXX function will be trivial when f128 isn't involved.
237
9.10k
  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
238
9.10k
  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
239
9.10k
  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
240
9.10k
  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
241
9.10k
  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
242
9.10k
  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
243
9.10k
  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
244
9.10k
  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
245
9.10k
  setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
246
9.10k
  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
247
9.10k
  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
248
9.10k
  setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
249
9.10k
  setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
250
9.10k
  setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
251
9.10k
252
9.10k
  // Variable arguments.
253
9.10k
  setOperationAction(ISD::VASTART, MVT::Other, Custom);
254
9.10k
  setOperationAction(ISD::VAARG, MVT::Other, Custom);
255
9.10k
  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
256
9.10k
  setOperationAction(ISD::VAEND, MVT::Other, Expand);
257
9.10k
258
9.10k
  // Variable-sized objects.
259
9.10k
  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
260
9.10k
  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
261
9.10k
262
9.10k
  if (Subtarget->isTargetWindows())
263
84
    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
264
9.02k
  else
265
9.02k
    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
266
9.10k
267
9.10k
  // Constant pool entries
268
9.10k
  setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
269
9.10k
270
9.10k
  // BlockAddress
271
9.10k
  setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
272
9.10k
273
9.10k
  // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
274
9.10k
  setOperationAction(ISD::ADDC, MVT::i32, Custom);
275
9.10k
  setOperationAction(ISD::ADDE, MVT::i32, Custom);
276
9.10k
  setOperationAction(ISD::SUBC, MVT::i32, Custom);
277
9.10k
  setOperationAction(ISD::SUBE, MVT::i32, Custom);
278
9.10k
  setOperationAction(ISD::ADDC, MVT::i64, Custom);
279
9.10k
  setOperationAction(ISD::ADDE, MVT::i64, Custom);
280
9.10k
  setOperationAction(ISD::SUBC, MVT::i64, Custom);
281
9.10k
  setOperationAction(ISD::SUBE, MVT::i64, Custom);
282
9.10k
283
9.10k
  // AArch64 lacks both left-rotate and popcount instructions.
284
9.10k
  setOperationAction(ISD::ROTL, MVT::i32, Expand);
285
9.10k
  setOperationAction(ISD::ROTL, MVT::i64, Expand);
286
1.01M
  for (MVT VT : MVT::vector_valuetypes()) {
287
1.01M
    setOperationAction(ISD::ROTL, VT, Expand);
288
1.01M
    setOperationAction(ISD::ROTR, VT, Expand);
289
1.01M
  }
290
9.10k
291
9.10k
  // AArch64 doesn't have {U|S}MUL_LOHI.
292
9.10k
  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
293
9.10k
  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
294
9.10k
295
9.10k
  setOperationAction(ISD::CTPOP, MVT::i32, Custom);
296
9.10k
  setOperationAction(ISD::CTPOP, MVT::i64, Custom);
297
9.10k
298
9.10k
  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
299
9.10k
  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
300
1.01M
  for (MVT VT : MVT::vector_valuetypes()) {
301
1.01M
    setOperationAction(ISD::SDIVREM, VT, Expand);
302
1.01M
    setOperationAction(ISD::UDIVREM, VT, Expand);
303
1.01M
  }
304
9.10k
  setOperationAction(ISD::SREM, MVT::i32, Expand);
305
9.10k
  setOperationAction(ISD::SREM, MVT::i64, Expand);
306
9.10k
  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
307
9.10k
  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
308
9.10k
  setOperationAction(ISD::UREM, MVT::i32, Expand);
309
9.10k
  setOperationAction(ISD::UREM, MVT::i64, Expand);
310
9.10k
311
9.10k
  // Custom lower Add/Sub/Mul with overflow.
312
9.10k
  setOperationAction(ISD::SADDO, MVT::i32, Custom);
313
9.10k
  setOperationAction(ISD::SADDO, MVT::i64, Custom);
314
9.10k
  setOperationAction(ISD::UADDO, MVT::i32, Custom);
315
9.10k
  setOperationAction(ISD::UADDO, MVT::i64, Custom);
316
9.10k
  setOperationAction(ISD::SSUBO, MVT::i32, Custom);
317
9.10k
  setOperationAction(ISD::SSUBO, MVT::i64, Custom);
318
9.10k
  setOperationAction(ISD::USUBO, MVT::i32, Custom);
319
9.10k
  setOperationAction(ISD::USUBO, MVT::i64, Custom);
320
9.10k
  setOperationAction(ISD::SMULO, MVT::i32, Custom);
321
9.10k
  setOperationAction(ISD::SMULO, MVT::i64, Custom);
322
9.10k
  setOperationAction(ISD::UMULO, MVT::i32, Custom);
323
9.10k
  setOperationAction(ISD::UMULO, MVT::i64, Custom);
324
9.10k
325
9.10k
  setOperationAction(ISD::FSIN, MVT::f32, Expand);
326
9.10k
  setOperationAction(ISD::FSIN, MVT::f64, Expand);
327
9.10k
  setOperationAction(ISD::FCOS, MVT::f32, Expand);
328
9.10k
  setOperationAction(ISD::FCOS, MVT::f64, Expand);
329
9.10k
  setOperationAction(ISD::FPOW, MVT::f32, Expand);
330
9.10k
  setOperationAction(ISD::FPOW, MVT::f64, Expand);
331
9.10k
  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
332
9.10k
  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
333
9.10k
  if (Subtarget->hasFullFP16())
334
69
    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
335
9.03k
  else
336
9.03k
    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
337
9.10k
338
9.10k
  setOperationAction(ISD::FREM,    MVT::f16,   Promote);
339
9.10k
  setOperationAction(ISD::FREM,    MVT::v4f16, Expand);
340
9.10k
  setOperationAction(ISD::FREM,    MVT::v8f16, Expand);
341
9.10k
  setOperationAction(ISD::FPOW,    MVT::f16,   Promote);
342
9.10k
  setOperationAction(ISD::FPOW,    MVT::v4f16, Expand);
343
9.10k
  setOperationAction(ISD::FPOW,    MVT::v8f16, Expand);
344
9.10k
  setOperationAction(ISD::FPOWI,   MVT::f16,   Promote);
345
9.10k
  setOperationAction(ISD::FPOWI,   MVT::v4f16, Expand);
346
9.10k
  setOperationAction(ISD::FPOWI,   MVT::v8f16, Expand);
347
9.10k
  setOperationAction(ISD::FCOS,    MVT::f16,   Promote);
348
9.10k
  setOperationAction(ISD::FCOS,    MVT::v4f16, Expand);
349
9.10k
  setOperationAction(ISD::FCOS,    MVT::v8f16, Expand);
350
9.10k
  setOperationAction(ISD::FSIN,    MVT::f16,   Promote);
351
9.10k
  setOperationAction(ISD::FSIN,    MVT::v4f16, Expand);
352
9.10k
  setOperationAction(ISD::FSIN,    MVT::v8f16, Expand);
353
9.10k
  setOperationAction(ISD::FSINCOS, MVT::f16,   Promote);
354
9.10k
  setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
355
9.10k
  setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
356
9.10k
  setOperationAction(ISD::FEXP,    MVT::f16,   Promote);
357
9.10k
  setOperationAction(ISD::FEXP,    MVT::v4f16, Expand);
358
9.10k
  setOperationAction(ISD::FEXP,    MVT::v8f16, Expand);
359
9.10k
  setOperationAction(ISD::FEXP2,   MVT::f16,   Promote);
360
9.10k
  setOperationAction(ISD::FEXP2,   MVT::v4f16, Expand);
361
9.10k
  setOperationAction(ISD::FEXP2,   MVT::v8f16, Expand);
362
9.10k
  setOperationAction(ISD::FLOG,    MVT::f16,   Promote);
363
9.10k
  setOperationAction(ISD::FLOG,    MVT::v4f16, Expand);
364
9.10k
  setOperationAction(ISD::FLOG,    MVT::v8f16, Expand);
365
9.10k
  setOperationAction(ISD::FLOG2,   MVT::f16,   Promote);
366
9.10k
  setOperationAction(ISD::FLOG2,   MVT::v4f16, Expand);
367
9.10k
  setOperationAction(ISD::FLOG2,   MVT::v8f16, Expand);
368
9.10k
  setOperationAction(ISD::FLOG10,  MVT::f16,   Promote);
369
9.10k
  setOperationAction(ISD::FLOG10,  MVT::v4f16, Expand);
370
9.10k
  setOperationAction(ISD::FLOG10,  MVT::v8f16, Expand);
371
9.10k
372
9.10k
  if (!Subtarget->hasFullFP16()) {
373
9.03k
    setOperationAction(ISD::SELECT,      MVT::f16,  Promote);
374
9.03k
    setOperationAction(ISD::SELECT_CC,   MVT::f16,  Promote);
375
9.03k
    setOperationAction(ISD::SETCC,       MVT::f16,  Promote);
376
9.03k
    setOperationAction(ISD::BR_CC,       MVT::f16,  Promote);
377
9.03k
    setOperationAction(ISD::FADD,        MVT::f16,  Promote);
378
9.03k
    setOperationAction(ISD::FSUB,        MVT::f16,  Promote);
379
9.03k
    setOperationAction(ISD::FMUL,        MVT::f16,  Promote);
380
9.03k
    setOperationAction(ISD::FDIV,        MVT::f16,  Promote);
381
9.03k
    setOperationAction(ISD::FMA,         MVT::f16,  Promote);
382
9.03k
    setOperationAction(ISD::FNEG,        MVT::f16,  Promote);
383
9.03k
    setOperationAction(ISD::FABS,        MVT::f16,  Promote);
384
9.03k
    setOperationAction(ISD::FCEIL,       MVT::f16,  Promote);
385
9.03k
    setOperationAction(ISD::FSQRT,       MVT::f16,  Promote);
386
9.03k
    setOperationAction(ISD::FFLOOR,      MVT::f16,  Promote);
387
9.03k
    setOperationAction(ISD::FNEARBYINT,  MVT::f16,  Promote);
388
9.03k
    setOperationAction(ISD::FRINT,       MVT::f16,  Promote);
389
9.03k
    setOperationAction(ISD::FROUND,      MVT::f16,  Promote);
390
9.03k
    setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
391
9.03k
    setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
392
9.03k
    setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
393
9.03k
    setOperationAction(ISD::FMINIMUM,    MVT::f16,  Promote);
394
9.03k
    setOperationAction(ISD::FMAXIMUM,    MVT::f16,  Promote);
395
9.03k
396
9.03k
    // promote v4f16 to v4f32 when that is known to be safe.
397
9.03k
    setOperationAction(ISD::FADD,        MVT::v4f16, Promote);
398
9.03k
    setOperationAction(ISD::FSUB,        MVT::v4f16, Promote);
399
9.03k
    setOperationAction(ISD::FMUL,        MVT::v4f16, Promote);
400
9.03k
    setOperationAction(ISD::FDIV,        MVT::v4f16, Promote);
401
9.03k
    setOperationAction(ISD::FP_EXTEND,   MVT::v4f16, Promote);
402
9.03k
    setOperationAction(ISD::FP_ROUND,    MVT::v4f16, Promote);
403
9.03k
    AddPromotedToType(ISD::FADD,         MVT::v4f16, MVT::v4f32);
404
9.03k
    AddPromotedToType(ISD::FSUB,         MVT::v4f16, MVT::v4f32);
405
9.03k
    AddPromotedToType(ISD::FMUL,         MVT::v4f16, MVT::v4f32);
406
9.03k
    AddPromotedToType(ISD::FDIV,         MVT::v4f16, MVT::v4f32);
407
9.03k
    AddPromotedToType(ISD::FP_EXTEND,    MVT::v4f16, MVT::v4f32);
408
9.03k
    AddPromotedToType(ISD::FP_ROUND,     MVT::v4f16, MVT::v4f32);
409
9.03k
410
9.03k
    setOperationAction(ISD::FABS,        MVT::v4f16, Expand);
411
9.03k
    setOperationAction(ISD::FNEG,        MVT::v4f16, Expand);
412
9.03k
    setOperationAction(ISD::FROUND,      MVT::v4f16, Expand);
413
9.03k
    setOperationAction(ISD::FMA,         MVT::v4f16, Expand);
414
9.03k
    setOperationAction(ISD::SETCC,       MVT::v4f16, Expand);
415
9.03k
    setOperationAction(ISD::BR_CC,       MVT::v4f16, Expand);
416
9.03k
    setOperationAction(ISD::SELECT,      MVT::v4f16, Expand);
417
9.03k
    setOperationAction(ISD::SELECT_CC,   MVT::v4f16, Expand);
418
9.03k
    setOperationAction(ISD::FTRUNC,      MVT::v4f16, Expand);
419
9.03k
    setOperationAction(ISD::FCOPYSIGN,   MVT::v4f16, Expand);
420
9.03k
    setOperationAction(ISD::FFLOOR,      MVT::v4f16, Expand);
421
9.03k
    setOperationAction(ISD::FCEIL,       MVT::v4f16, Expand);
422
9.03k
    setOperationAction(ISD::FRINT,       MVT::v4f16, Expand);
423
9.03k
    setOperationAction(ISD::FNEARBYINT,  MVT::v4f16, Expand);
424
9.03k
    setOperationAction(ISD::FSQRT,       MVT::v4f16, Expand);
425
9.03k
426
9.03k
    setOperationAction(ISD::FABS,        MVT::v8f16, Expand);
427
9.03k
    setOperationAction(ISD::FADD,        MVT::v8f16, Expand);
428
9.03k
    setOperationAction(ISD::FCEIL,       MVT::v8f16, Expand);
429
9.03k
    setOperationAction(ISD::FCOPYSIGN,   MVT::v8f16, Expand);
430
9.03k
    setOperationAction(ISD::FDIV,        MVT::v8f16, Expand);
431
9.03k
    setOperationAction(ISD::FFLOOR,      MVT::v8f16, Expand);
432
9.03k
    setOperationAction(ISD::FMA,         MVT::v8f16, Expand);
433
9.03k
    setOperationAction(ISD::FMUL,        MVT::v8f16, Expand);
434
9.03k
    setOperationAction(ISD::FNEARBYINT,  MVT::v8f16, Expand);
435
9.03k
    setOperationAction(ISD::FNEG,        MVT::v8f16, Expand);
436
9.03k
    setOperationAction(ISD::FROUND,      MVT::v8f16, Expand);
437
9.03k
    setOperationAction(ISD::FRINT,       MVT::v8f16, Expand);
438
9.03k
    setOperationAction(ISD::FSQRT,       MVT::v8f16, Expand);
439
9.03k
    setOperationAction(ISD::FSUB,        MVT::v8f16, Expand);
440
9.03k
    setOperationAction(ISD::FTRUNC,      MVT::v8f16, Expand);
441
9.03k
    setOperationAction(ISD::SETCC,       MVT::v8f16, Expand);
442
9.03k
    setOperationAction(ISD::BR_CC,       MVT::v8f16, Expand);
443
9.03k
    setOperationAction(ISD::SELECT,      MVT::v8f16, Expand);
444
9.03k
    setOperationAction(ISD::SELECT_CC,   MVT::v8f16, Expand);
445
9.03k
    setOperationAction(ISD::FP_EXTEND,   MVT::v8f16, Expand);
446
9.03k
  }
447
9.10k
448
9.10k
  // AArch64 has implementations of a lot of rounding-like FP operations.
449
18.2k
  for (MVT Ty : {MVT::f32, MVT::f64}) {
450
18.2k
    setOperationAction(ISD::FFLOOR, Ty, Legal);
451
18.2k
    setOperationAction(ISD::FNEARBYINT, Ty, Legal);
452
18.2k
    setOperationAction(ISD::FCEIL, Ty, Legal);
453
18.2k
    setOperationAction(ISD::FRINT, Ty, Legal);
454
18.2k
    setOperationAction(ISD::FTRUNC, Ty, Legal);
455
18.2k
    setOperationAction(ISD::FROUND, Ty, Legal);
456
18.2k
    setOperationAction(ISD::FMINNUM, Ty, Legal);
457
18.2k
    setOperationAction(ISD::FMAXNUM, Ty, Legal);
458
18.2k
    setOperationAction(ISD::FMINIMUM, Ty, Legal);
459
18.2k
    setOperationAction(ISD::FMAXIMUM, Ty, Legal);
460
18.2k
    setOperationAction(ISD::LROUND, Ty, Legal);
461
18.2k
    setOperationAction(ISD::LLROUND, Ty, Legal);
462
18.2k
    setOperationAction(ISD::LRINT, Ty, Legal);
463
18.2k
    setOperationAction(ISD::LLRINT, Ty, Legal);
464
18.2k
  }
465
9.10k
466
9.10k
  if (Subtarget->hasFullFP16()) {
467
69
    setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
468
69
    setOperationAction(ISD::FFLOOR,  MVT::f16, Legal);
469
69
    setOperationAction(ISD::FCEIL,   MVT::f16, Legal);
470
69
    setOperationAction(ISD::FRINT,   MVT::f16, Legal);
471
69
    setOperationAction(ISD::FTRUNC,  MVT::f16, Legal);
472
69
    setOperationAction(ISD::FROUND,  MVT::f16, Legal);
473
69
    setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
474
69
    setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
475
69
    setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
476
69
    setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
477
69
  }
478
9.10k
479
9.10k
  setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
480
9.10k
481
9.10k
  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
482
9.10k
483
9.10k
  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
484
9.10k
  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
485
9.10k
  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
486
9.10k
  setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
487
9.10k
  setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
488
9.10k
489
9.10k
  // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
490
9.10k
  // This requires the Performance Monitors extension.
491
9.10k
  if (Subtarget->hasPerfMon())
492
9.09k
    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
493
9.10k
494
9.10k
  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
495
9.10k
      
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr6.97k
) {
496
6.97k
    // Issue __sincos_stret if available.
497
6.97k
    setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
498
6.97k
    setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
499
6.97k
  } else {
500
2.13k
    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
501
2.13k
    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
502
2.13k
  }
503
9.10k
504
9.10k
  // Make floating-point constants legal for the large code model, so they don't
505
9.10k
  // become loads from the constant pool.
506
9.10k
  if (Subtarget->isTargetMachO() && 
TM.getCodeModel() == CodeModel::Large7.52k
) {
507
4
    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
508
4
    setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
509
4
  }
510
9.10k
511
9.10k
  // AArch64 does not have floating-point extending loads, i1 sign-extending
512
9.10k
  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
513
54.6k
  for (MVT VT : MVT::fp_valuetypes()) {
514
54.6k
    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
515
54.6k
    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
516
54.6k
    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
517
54.6k
    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
518
54.6k
  }
519
9.10k
  for (MVT VT : MVT::integer_valuetypes())
520
54.6k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
521
9.10k
522
9.10k
  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
523
9.10k
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
524
9.10k
  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
525
9.10k
  setTruncStoreAction(MVT::f128, MVT::f80, Expand);
526
9.10k
  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
527
9.10k
  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
528
9.10k
  setTruncStoreAction(MVT::f128, MVT::f16, Expand);
529
9.10k
530
9.10k
  setOperationAction(ISD::BITCAST, MVT::i16, Custom);
531
9.10k
  setOperationAction(ISD::BITCAST, MVT::f16, Custom);
532
9.10k
533
9.10k
  // Indexed loads and stores are supported.
534
9.10k
  for (unsigned im = (unsigned)ISD::PRE_INC;
535
45.5k
       im != (unsigned)ISD::LAST_INDEXED_MODE; 
++im36.4k
) {
536
36.4k
    setIndexedLoadAction(im, MVT::i8, Legal);
537
36.4k
    setIndexedLoadAction(im, MVT::i16, Legal);
538
36.4k
    setIndexedLoadAction(im, MVT::i32, Legal);
539
36.4k
    setIndexedLoadAction(im, MVT::i64, Legal);
540
36.4k
    setIndexedLoadAction(im, MVT::f64, Legal);
541
36.4k
    setIndexedLoadAction(im, MVT::f32, Legal);
542
36.4k
    setIndexedLoadAction(im, MVT::f16, Legal);
543
36.4k
    setIndexedStoreAction(im, MVT::i8, Legal);
544
36.4k
    setIndexedStoreAction(im, MVT::i16, Legal);
545
36.4k
    setIndexedStoreAction(im, MVT::i32, Legal);
546
36.4k
    setIndexedStoreAction(im, MVT::i64, Legal);
547
36.4k
    setIndexedStoreAction(im, MVT::f64, Legal);
548
36.4k
    setIndexedStoreAction(im, MVT::f32, Legal);
549
36.4k
    setIndexedStoreAction(im, MVT::f16, Legal);
550
36.4k
  }
551
9.10k
552
9.10k
  // Trap.
553
9.10k
  setOperationAction(ISD::TRAP, MVT::Other, Legal);
554
9.10k
  if (Subtarget->isTargetWindows())
555
84
    setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
556
9.10k
557
9.10k
  // We combine OR nodes for bitfield operations.
558
9.10k
  setTargetDAGCombine(ISD::OR);
559
9.10k
  // Try to create BICs for vector ANDs.
560
9.10k
  setTargetDAGCombine(ISD::AND);
561
9.10k
562
9.10k
  // Vector add and sub nodes may conceal a high-half opportunity.
563
9.10k
  // Also, try to fold ADD into CSINC/CSINV..
564
9.10k
  setTargetDAGCombine(ISD::ADD);
565
9.10k
  setTargetDAGCombine(ISD::SUB);
566
9.10k
  setTargetDAGCombine(ISD::SRL);
567
9.10k
  setTargetDAGCombine(ISD::XOR);
568
9.10k
  setTargetDAGCombine(ISD::SINT_TO_FP);
569
9.10k
  setTargetDAGCombine(ISD::UINT_TO_FP);
570
9.10k
571
9.10k
  setTargetDAGCombine(ISD::FP_TO_SINT);
572
9.10k
  setTargetDAGCombine(ISD::FP_TO_UINT);
573
9.10k
  setTargetDAGCombine(ISD::FDIV);
574
9.10k
575
9.10k
  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
576
9.10k
577
9.10k
  setTargetDAGCombine(ISD::ANY_EXTEND);
578
9.10k
  setTargetDAGCombine(ISD::ZERO_EXTEND);
579
9.10k
  setTargetDAGCombine(ISD::SIGN_EXTEND);
580
9.10k
  setTargetDAGCombine(ISD::BITCAST);
581
9.10k
  setTargetDAGCombine(ISD::CONCAT_VECTORS);
582
9.10k
  setTargetDAGCombine(ISD::STORE);
583
9.10k
  if (Subtarget->supportsAddressTopByteIgnored())
584
1
    setTargetDAGCombine(ISD::LOAD);
585
9.10k
586
9.10k
  setTargetDAGCombine(ISD::MUL);
587
9.10k
588
9.10k
  setTargetDAGCombine(ISD::SELECT);
589
9.10k
  setTargetDAGCombine(ISD::VSELECT);
590
9.10k
591
9.10k
  setTargetDAGCombine(ISD::INTRINSIC_VOID);
592
9.10k
  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
593
9.10k
  setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
594
9.10k
595
9.10k
  setTargetDAGCombine(ISD::GlobalAddress);
596
9.10k
597
9.10k
  // In case of strict alignment, avoid an excessive number of byte wide stores.
598
9.10k
  MaxStoresPerMemsetOptSize = 8;
599
9.10k
  MaxStoresPerMemset = Subtarget->requiresStrictAlign()
600
9.10k
                       ? 
MaxStoresPerMemsetOptSize7
:
329.09k
;
601
9.10k
602
9.10k
  MaxGluedStoresPerMemcpy = 4;
603
9.10k
  MaxStoresPerMemcpyOptSize = 4;
604
9.10k
  MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
605
9.10k
                       ? 
MaxStoresPerMemcpyOptSize7
:
169.09k
;
606
9.10k
607
9.10k
  MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
608
9.10k
609
9.10k
  setStackPointerRegisterToSaveRestore(AArch64::SP);
610
9.10k
611
9.10k
  setSchedulingPreference(Sched::Hybrid);
612
9.10k
613
9.10k
  EnableExtLdPromotion = true;
614
9.10k
615
9.10k
  // Set required alignment.
616
9.10k
  setMinFunctionAlignment(2);
617
9.10k
  // Set preferred alignments.
618
9.10k
  setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
619
9.10k
  setPrefLoopAlignment(STI.getPrefLoopAlignment());
620
9.10k
621
9.10k
  // Only change the limit for entries in a jump table if specified by
622
9.10k
  // the sub target, but not at the command line.
623
9.10k
  unsigned MaxJT = STI.getMaximumJumpTableSize();
624
9.10k
  if (MaxJT && 
getMaximumJumpTableSize() == UINT_MAX47
)
625
9.10k
    
setMaximumJumpTableSize(MaxJT)46
;
626
9.10k
627
9.10k
  setHasExtractBitsInsn(true);
628
9.10k
629
9.10k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
630
9.10k
631
9.10k
  if (Subtarget->hasNEON()) {
632
9.08k
    // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
633
9.08k
    // silliness like this:
634
9.08k
    setOperationAction(ISD::FABS, MVT::v1f64, Expand);
635
9.08k
    setOperationAction(ISD::FADD, MVT::v1f64, Expand);
636
9.08k
    setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
637
9.08k
    setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
638
9.08k
    setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
639
9.08k
    setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
640
9.08k
    setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
641
9.08k
    setOperationAction(ISD::FMA, MVT::v1f64, Expand);
642
9.08k
    setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
643
9.08k
    setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
644
9.08k
    setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
645
9.08k
    setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
646
9.08k
    setOperationAction(ISD::FREM, MVT::v1f64, Expand);
647
9.08k
    setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
648
9.08k
    setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
649
9.08k
    setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
650
9.08k
    setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
651
9.08k
    setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
652
9.08k
    setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
653
9.08k
    setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
654
9.08k
    setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
655
9.08k
    setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
656
9.08k
    setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
657
9.08k
    setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
658
9.08k
    setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
659
9.08k
660
9.08k
    setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
661
9.08k
    setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
662
9.08k
    setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
663
9.08k
    setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
664
9.08k
    setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
665
9.08k
666
9.08k
    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
667
9.08k
668
9.08k
    // AArch64 doesn't have a direct vector ->f32 conversion instructions for
669
9.08k
    // elements smaller than i32, so promote the input to i32 first.
670
9.08k
    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
671
9.08k
    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
672
9.08k
    // i8 vector elements also need promotion to i32 for v8i8
673
9.08k
    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
674
9.08k
    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
675
9.08k
    // Similarly, there is no direct i32 -> f64 vector conversion instruction.
676
9.08k
    setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
677
9.08k
    setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
678
9.08k
    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
679
9.08k
    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
680
9.08k
    // Or, direct i32 -> f16 vector conversion.  Set it so custom, so the
681
9.08k
    // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
682
9.08k
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
683
9.08k
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
684
9.08k
685
9.08k
    if (Subtarget->hasFullFP16()) {
686
69
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
687
69
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
688
69
      setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
689
69
      setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
690
9.01k
    } else {
691
9.01k
      // when AArch64 doesn't have fullfp16 support, promote the input
692
9.01k
      // to i32 first.
693
9.01k
      setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
694
9.01k
      setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
695
9.01k
      setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
696
9.01k
      setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
697
9.01k
    }
698
9.08k
699
9.08k
    setOperationAction(ISD::CTLZ,       MVT::v1i64, Expand);
700
9.08k
    setOperationAction(ISD::CTLZ,       MVT::v2i64, Expand);
701
9.08k
702
9.08k
    // AArch64 doesn't have MUL.2d:
703
9.08k
    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
704
9.08k
    // Custom handling for some quad-vector types to detect MULL.
705
9.08k
    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
706
9.08k
    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
707
9.08k
    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
708
9.08k
709
9.08k
    // Vector reductions
710
9.08k
    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
711
63.5k
                    MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
712
63.5k
      setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
713
63.5k
      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
714
63.5k
      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
715
63.5k
      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
716
63.5k
      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
717
63.5k
    }
718
9.08k
    for (MVT VT : { MVT::v4f16, MVT::v2f32,
719
45.4k
                    MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
720
45.4k
      setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
721
45.4k
      setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
722
45.4k
    }
723
9.08k
724
9.08k
    setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
725
9.08k
    setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
726
9.08k
    // Likewise, narrowing and extending vector loads/stores aren't handled
727
9.08k
    // directly.
728
1.00M
    for (MVT VT : MVT::vector_valuetypes()) {
729
1.00M
      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
730
1.00M
731
1.00M
      if (VT == MVT::v16i8 || 
VT == MVT::v8i16999k
||
VT == MVT::v4i32989k
) {
732
27.2k
        setOperationAction(ISD::MULHS, VT, Legal);
733
27.2k
        setOperationAction(ISD::MULHU, VT, Legal);
734
980k
      } else {
735
980k
        setOperationAction(ISD::MULHS, VT, Expand);
736
980k
        setOperationAction(ISD::MULHU, VT, Expand);
737
980k
      }
738
1.00M
      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
739
1.00M
      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
740
1.00M
741
1.00M
      setOperationAction(ISD::BSWAP, VT, Expand);
742
1.00M
      setOperationAction(ISD::CTTZ, VT, Expand);
743
1.00M
744
111M
      for (MVT InnerVT : MVT::vector_valuetypes()) {
745
111M
        setTruncStoreAction(VT, InnerVT, Expand);
746
111M
        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
747
111M
        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
748
111M
        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
749
111M
      }
750
1.00M
    }
751
9.08k
752
9.08k
    // AArch64 has implementations of a lot of rounding-like FP operations.
753
27.2k
    for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
754
27.2k
      setOperationAction(ISD::FFLOOR, Ty, Legal);
755
27.2k
      setOperationAction(ISD::FNEARBYINT, Ty, Legal);
756
27.2k
      setOperationAction(ISD::FCEIL, Ty, Legal);
757
27.2k
      setOperationAction(ISD::FRINT, Ty, Legal);
758
27.2k
      setOperationAction(ISD::FTRUNC, Ty, Legal);
759
27.2k
      setOperationAction(ISD::FROUND, Ty, Legal);
760
27.2k
    }
761
9.08k
762
9.08k
    if (Subtarget->hasFullFP16()) {
763
138
      for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
764
138
        setOperationAction(ISD::FFLOOR, Ty, Legal);
765
138
        setOperationAction(ISD::FNEARBYINT, Ty, Legal);
766
138
        setOperationAction(ISD::FCEIL, Ty, Legal);
767
138
        setOperationAction(ISD::FRINT, Ty, Legal);
768
138
        setOperationAction(ISD::FTRUNC, Ty, Legal);
769
138
        setOperationAction(ISD::FROUND, Ty, Legal);
770
138
      }
771
69
    }
772
9.08k
773
9.08k
    setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
774
9.08k
  }
775
9.10k
776
9.10k
  PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
777
9.10k
}
778
779
127k
void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
780
127k
  assert(VT.isVector() && "VT should be a vector type");
781
127k
782
127k
  if (VT.isFloatingPoint()) {
783
54.4k
    MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
784
54.4k
    setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
785
54.4k
    setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
786
54.4k
  }
787
127k
788
127k
  // Mark vector float intrinsics as expand.
789
127k
  if (VT == MVT::v2f32 || 
VT == MVT::v4f32118k
||
VT == MVT::v2f64108k
) {
790
27.2k
    setOperationAction(ISD::FSIN, VT, Expand);
791
27.2k
    setOperationAction(ISD::FCOS, VT, Expand);
792
27.2k
    setOperationAction(ISD::FPOW, VT, Expand);
793
27.2k
    setOperationAction(ISD::FLOG, VT, Expand);
794
27.2k
    setOperationAction(ISD::FLOG2, VT, Expand);
795
27.2k
    setOperationAction(ISD::FLOG10, VT, Expand);
796
27.2k
    setOperationAction(ISD::FEXP, VT, Expand);
797
27.2k
    setOperationAction(ISD::FEXP2, VT, Expand);
798
27.2k
799
27.2k
    // But we do support custom-lowering for FCOPYSIGN.
800
27.2k
    setOperationAction(ISD::FCOPYSIGN, VT, Custom);
801
27.2k
  }
802
127k
803
127k
  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
804
127k
  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
805
127k
  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
806
127k
  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
807
127k
  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
808
127k
  setOperationAction(ISD::SRA, VT, Custom);
809
127k
  setOperationAction(ISD::SRL, VT, Custom);
810
127k
  setOperationAction(ISD::SHL, VT, Custom);
811
127k
  setOperationAction(ISD::OR, VT, Custom);
812
127k
  setOperationAction(ISD::SETCC, VT, Custom);
813
127k
  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
814
127k
815
127k
  setOperationAction(ISD::SELECT, VT, Expand);
816
127k
  setOperationAction(ISD::SELECT_CC, VT, Expand);
817
127k
  setOperationAction(ISD::VSELECT, VT, Expand);
818
127k
  for (MVT InnerVT : MVT::all_valuetypes())
819
16.4M
    setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
820
127k
821
127k
  // CNT supports only B element sizes, then use UADDLP to widen.
822
127k
  if (VT != MVT::v8i8 && 
VT != MVT::v16i8118k
)
823
108k
    setOperationAction(ISD::CTPOP, VT, Custom);
824
127k
825
127k
  setOperationAction(ISD::UDIV, VT, Expand);
826
127k
  setOperationAction(ISD::SDIV, VT, Expand);
827
127k
  setOperationAction(ISD::UREM, VT, Expand);
828
127k
  setOperationAction(ISD::SREM, VT, Expand);
829
127k
  setOperationAction(ISD::FREM, VT, Expand);
830
127k
831
127k
  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
832
127k
  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
833
127k
834
127k
  if (!VT.isFloatingPoint())
835
72.6k
    setOperationAction(ISD::ABS, VT, Legal);
836
127k
837
127k
  // [SU][MIN|MAX] are available for all NEON types apart from i64.
838
127k
  if (!VT.isFloatingPoint() && 
VT != MVT::v2i6472.6k
&&
VT != MVT::v1i6463.5k
)
839
54.4k
    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
840
217k
      setOperationAction(Opcode, VT, Legal);
841
127k
842
127k
  // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
843
127k
  if (VT.isFloatingPoint() &&
844
127k
      
(54.4k
VT.getVectorElementType() != MVT::f1654.4k
||
Subtarget->hasFullFP16()18.1k
))
845
36.4k
    for (unsigned Opcode :
846
36.4k
         {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
847
145k
      setOperationAction(Opcode, VT, Legal);
848
127k
849
127k
  if (Subtarget->isLittleEndian()) {
850
126k
    for (unsigned im = (unsigned)ISD::PRE_INC;
851
633k
         im != (unsigned)ISD::LAST_INDEXED_MODE; 
++im506k
) {
852
506k
      setIndexedLoadAction(im, VT, Legal);
853
506k
      setIndexedStoreAction(im, VT, Legal);
854
506k
    }
855
126k
  }
856
127k
}
857
858
63.5k
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
859
63.5k
  addRegisterClass(VT, &AArch64::FPR64RegClass);
860
63.5k
  addTypeForNEON(VT, MVT::v2i32);
861
63.5k
}
862
863
63.5k
void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
864
63.5k
  addRegisterClass(VT, &AArch64::FPR128RegClass);
865
63.5k
  addTypeForNEON(VT, MVT::v4i32);
866
63.5k
}
867
868
EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
869
725k
                                              EVT VT) const {
870
725k
  if (!VT.isVector())
871
720k
    return MVT::i32;
872
4.73k
  return VT.changeVectorElementTypeToInteger();
873
4.73k
}
874
875
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
876
                               const APInt &Demanded,
877
                               TargetLowering::TargetLoweringOpt &TLO,
878
41.0k
                               unsigned NewOpc) {
879
41.0k
  uint64_t OldImm = Imm, NewImm, Enc;
880
41.0k
  uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
881
41.0k
882
41.0k
  // Return if the immediate is already all zeros, all ones, a bimm32 or a
883
41.0k
  // bimm64.
884
41.0k
  if (Imm == 0 || Imm == Mask ||
885
41.0k
      AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
886
40.3k
    return false;
887
670
888
670
  unsigned EltSize = Size;
889
670
  uint64_t DemandedBits = Demanded.getZExtValue();
890
670
891
670
  // Clear bits that are not demanded.
892
670
  Imm &= DemandedBits;
893
670
894
1.03k
  while (true) {
895
1.03k
    // The goal here is to set the non-demanded bits in a way that minimizes
896
1.03k
    // the number of switching between 0 and 1. In order to achieve this goal,
897
1.03k
    // we set the non-demanded bits to the value of the preceding demanded bits.
898
1.03k
    // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
899
1.03k
    // non-demanded bit), we copy bit0 (1) to the least significant 'x',
900
1.03k
    // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
901
1.03k
    // The final result is 0b11000011.
902
1.03k
    uint64_t NonDemandedBits = ~DemandedBits;
903
1.03k
    uint64_t InvertedImm = ~Imm & DemandedBits;
904
1.03k
    uint64_t RotatedImm =
905
1.03k
        ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
906
1.03k
        NonDemandedBits;
907
1.03k
    uint64_t Sum = RotatedImm + NonDemandedBits;
908
1.03k
    bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
909
1.03k
    uint64_t Ones = (Sum + Carry) & NonDemandedBits;
910
1.03k
    NewImm = (Imm | Ones) & Mask;
911
1.03k
912
1.03k
    // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
913
1.03k
    // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
914
1.03k
    // we halve the element size and continue the search.
915
1.03k
    if (isShiftedMask_64(NewImm) || 
isShiftedMask_64(~(NewImm | ~Mask))1.00k
)
916
166
      break;
917
867
918
867
    // We cannot shrink the element size any further if it is 2-bits.
919
867
    if (EltSize == 2)
920
0
      return false;
921
867
922
867
    EltSize /= 2;
923
867
    Mask >>= EltSize;
924
867
    uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
925
867
926
867
    // Return if there is mismatch in any of the demanded bits of Imm and Hi.
927
867
    if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
928
504
      return false;
929
363
930
363
    // Merge the upper and lower halves of Imm and DemandedBits.
931
363
    Imm |= Hi;
932
363
    DemandedBits |= DemandedBitsHi;
933
363
  }
934
670
935
670
  ++NumOptimizedImms;
936
166
937
166
  // Replicate the element across the register width.
938
221
  while (EltSize < Size) {
939
55
    NewImm |= NewImm << EltSize;
940
55
    EltSize *= 2;
941
55
  }
942
166
943
166
  (void)OldImm;
944
166
  assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
945
166
         "demanded bits should never be altered");
946
166
  assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
947
166
948
166
  // Create the new constant immediate node.
949
166
  EVT VT = Op.getValueType();
950
166
  SDLoc DL(Op);
951
166
  SDValue New;
952
166
953
166
  // If the new constant immediate is all-zeros or all-ones, let the target
954
166
  // independent DAG combine optimize this node.
955
166
  if (NewImm == 0 || 
NewImm == OrigMask165
) {
956
1
    New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
957
1
                          TLO.DAG.getConstant(NewImm, DL, VT));
958
1
  // Otherwise, create a machine node so that target independent DAG combine
959
1
  // doesn't undo this optimization.
960
165
  } else {
961
165
    Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
962
165
    SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
963
165
    New = SDValue(
964
165
        TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
965
165
  }
966
166
967
166
  return TLO.CombineTo(Op, New);
968
670
}
969
970
bool AArch64TargetLowering::targetShrinkDemandedConstant(
971
402k
    SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
972
402k
  // Delay this optimization to as late as possible.
973
402k
  if (!TLO.LegalOps)
974
237k
    return false;
975
164k
976
164k
  if (!EnableOptimizeLogicalImm)
977
0
    return false;
978
164k
979
164k
  EVT VT = Op.getValueType();
980
164k
  if (VT.isVector())
981
43.2k
    return false;
982
121k
983
121k
  unsigned Size = VT.getSizeInBits();
984
121k
  assert((Size == 32 || Size == 64) &&
985
121k
         "i32 or i64 is expected after legalization.");
986
121k
987
121k
  // Exit early if we demand all bits.
988
121k
  if (Demanded.countPopulation() == Size)
989
76.8k
    return false;
990
44.8k
991
44.8k
  unsigned NewOpc;
992
44.8k
  switch (Op.getOpcode()) {
993
44.8k
  default:
994
273
    return false;
995
44.8k
  case ISD::AND:
996
42.7k
    NewOpc = Size == 32 ? 
AArch64::ANDWri20.3k
:
AArch64::ANDXri22.3k
;
997
42.7k
    break;
998
44.8k
  case ISD::OR:
999
1.47k
    NewOpc = Size == 32 ? 
AArch64::ORRWri1.30k
:
AArch64::ORRXri172
;
1000
1.47k
    break;
1001
44.8k
  case ISD::XOR:
1002
354
    NewOpc = Size == 32 ? 
AArch64::EORWri343
:
AArch64::EORXri11
;
1003
354
    break;
1004
44.5k
  }
1005
44.5k
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1006
44.5k
  if (!C)
1007
3.51k
    return false;
1008
41.0k
  uint64_t Imm = C->getZExtValue();
1009
41.0k
  return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
1010
41.0k
}
1011
1012
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1013
/// Mask are known to be either zero or one and return them Known.
1014
void AArch64TargetLowering::computeKnownBitsForTargetNode(
1015
    const SDValue Op, KnownBits &Known,
1016
703k
    const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1017
703k
  switch (Op.getOpcode()) {
1018
703k
  default:
1019
314k
    break;
1020
703k
  case AArch64ISD::CSEL: {
1021
142k
    KnownBits Known2;
1022
142k
    Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1023
142k
    Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1024
142k
    Known.Zero &= Known2.Zero;
1025
142k
    Known.One &= Known2.One;
1026
142k
    break;
1027
703k
  }
1028
703k
  case ISD::INTRINSIC_W_CHAIN: {
1029
190k
    ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1030
190k
    Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1031
190k
    switch (IntID) {
1032
190k
    
default: return50.8k
;
1033
190k
    case Intrinsic::aarch64_ldaxr:
1034
139k
    case Intrinsic::aarch64_ldxr: {
1035
139k
      unsigned BitWidth = Known.getBitWidth();
1036
139k
      EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1037
139k
      unsigned MemBits = VT.getScalarSizeInBits();
1038
139k
      Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1039
139k
      return;
1040
0
    }
1041
0
    }
1042
0
    break;
1043
0
  }
1044
55.7k
  case ISD::INTRINSIC_WO_CHAIN:
1045
55.7k
  case ISD::INTRINSIC_VOID: {
1046
55.7k
    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1047
55.7k
    switch (IntNo) {
1048
55.7k
    default:
1049
55.7k
      break;
1050
55.7k
    case Intrinsic::aarch64_neon_umaxv:
1051
25
    case Intrinsic::aarch64_neon_uminv: {
1052
25
      // Figure out the datatype of the vector operand. The UMINV instruction
1053
25
      // will zero extend the result, so we can mark as known zero all the
1054
25
      // bits larger than the element datatype. 32-bit or larget doesn't need
1055
25
      // this as those are legal types and will be handled by isel directly.
1056
25
      MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1057
25
      unsigned BitWidth = Known.getBitWidth();
1058
25
      if (VT == MVT::v8i8 || 
VT == MVT::v16i817
) {
1059
16
        assert(BitWidth >= 8 && "Unexpected width!");
1060
16
        APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1061
16
        Known.Zero |= Mask;
1062
16
      } else 
if (9
VT == MVT::v4i169
||
VT == MVT::v8i164
) {
1063
9
        assert(BitWidth >= 16 && "Unexpected width!");
1064
9
        APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1065
9
        Known.Zero |= Mask;
1066
9
      }
1067
25
      break;
1068
25
    } 
break0
;
1069
55.7k
    }
1070
55.7k
  }
1071
703k
  }
1072
703k
}
1073
1074
MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1075
226k
                                                  EVT) const {
1076
226k
  return MVT::i64;
1077
226k
}
1078
1079
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1080
    EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1081
263k
    bool *Fast) const {
1082
263k
  if (Subtarget->requiresStrictAlign())
1083
322
    return false;
1084
263k
1085
263k
  if (Fast) {
1086
9.88k
    // Some CPUs are fine with unaligned stores except for 128-bit ones.
1087
9.88k
    *Fast = !Subtarget->isMisaligned128StoreSlow() || 
VT.getStoreSize() != 1642
||
1088
9.88k
            // See comments in performSTORECombine() for more details about
1089
9.88k
            // these conditions.
1090
9.88k
1091
9.88k
            // Code that uses clang vector extensions can mark that it
1092
9.88k
            // wants unaligned accesses to be treated as fast by
1093
9.88k
            // underspecifying alignment to be 1 or 2.
1094
9.88k
            
Align <= 231
||
1095
9.88k
1096
9.88k
            // Disregard v2i64. Memcpy lowering produces those and splitting
1097
9.88k
            // them regresses performance on micro-benchmarks and olden/bh.
1098
9.88k
            
VT == MVT::v2i6428
;
1099
9.88k
  }
1100
263k
  return true;
1101
263k
}
1102
1103
FastISel *
1104
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1105
1.26k
                                      const TargetLibraryInfo *libInfo) const {
1106
1.26k
  return AArch64::createFastISel(funcInfo, libInfo);
1107
1.26k
}
1108
1109
0
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1110
0
  switch ((AArch64ISD::NodeType)Opcode) {
1111
0
  case AArch64ISD::FIRST_NUMBER:      break;
1112
0
  case AArch64ISD::CALL:              return "AArch64ISD::CALL";
1113
0
  case AArch64ISD::ADRP:              return "AArch64ISD::ADRP";
1114
0
  case AArch64ISD::ADR:               return "AArch64ISD::ADR";
1115
0
  case AArch64ISD::ADDlow:            return "AArch64ISD::ADDlow";
1116
0
  case AArch64ISD::LOADgot:           return "AArch64ISD::LOADgot";
1117
0
  case AArch64ISD::RET_FLAG:          return "AArch64ISD::RET_FLAG";
1118
0
  case AArch64ISD::BRCOND:            return "AArch64ISD::BRCOND";
1119
0
  case AArch64ISD::CSEL:              return "AArch64ISD::CSEL";
1120
0
  case AArch64ISD::FCSEL:             return "AArch64ISD::FCSEL";
1121
0
  case AArch64ISD::CSINV:             return "AArch64ISD::CSINV";
1122
0
  case AArch64ISD::CSNEG:             return "AArch64ISD::CSNEG";
1123
0
  case AArch64ISD::CSINC:             return "AArch64ISD::CSINC";
1124
0
  case AArch64ISD::THREAD_POINTER:    return "AArch64ISD::THREAD_POINTER";
1125
0
  case AArch64ISD::TLSDESC_CALLSEQ:   return "AArch64ISD::TLSDESC_CALLSEQ";
1126
0
  case AArch64ISD::ADC:               return "AArch64ISD::ADC";
1127
0
  case AArch64ISD::SBC:               return "AArch64ISD::SBC";
1128
0
  case AArch64ISD::ADDS:              return "AArch64ISD::ADDS";
1129
0
  case AArch64ISD::SUBS:              return "AArch64ISD::SUBS";
1130
0
  case AArch64ISD::ADCS:              return "AArch64ISD::ADCS";
1131
0
  case AArch64ISD::SBCS:              return "AArch64ISD::SBCS";
1132
0
  case AArch64ISD::ANDS:              return "AArch64ISD::ANDS";
1133
0
  case AArch64ISD::CCMP:              return "AArch64ISD::CCMP";
1134
0
  case AArch64ISD::CCMN:              return "AArch64ISD::CCMN";
1135
0
  case AArch64ISD::FCCMP:             return "AArch64ISD::FCCMP";
1136
0
  case AArch64ISD::FCMP:              return "AArch64ISD::FCMP";
1137
0
  case AArch64ISD::DUP:               return "AArch64ISD::DUP";
1138
0
  case AArch64ISD::DUPLANE8:          return "AArch64ISD::DUPLANE8";
1139
0
  case AArch64ISD::DUPLANE16:         return "AArch64ISD::DUPLANE16";
1140
0
  case AArch64ISD::DUPLANE32:         return "AArch64ISD::DUPLANE32";
1141
0
  case AArch64ISD::DUPLANE64:         return "AArch64ISD::DUPLANE64";
1142
0
  case AArch64ISD::MOVI:              return "AArch64ISD::MOVI";
1143
0
  case AArch64ISD::MOVIshift:         return "AArch64ISD::MOVIshift";
1144
0
  case AArch64ISD::MOVIedit:          return "AArch64ISD::MOVIedit";
1145
0
  case AArch64ISD::MOVImsl:           return "AArch64ISD::MOVImsl";
1146
0
  case AArch64ISD::FMOV:              return "AArch64ISD::FMOV";
1147
0
  case AArch64ISD::MVNIshift:         return "AArch64ISD::MVNIshift";
1148
0
  case AArch64ISD::MVNImsl:           return "AArch64ISD::MVNImsl";
1149
0
  case AArch64ISD::BICi:              return "AArch64ISD::BICi";
1150
0
  case AArch64ISD::ORRi:              return "AArch64ISD::ORRi";
1151
0
  case AArch64ISD::BSL:               return "AArch64ISD::BSL";
1152
0
  case AArch64ISD::NEG:               return "AArch64ISD::NEG";
1153
0
  case AArch64ISD::EXTR:              return "AArch64ISD::EXTR";
1154
0
  case AArch64ISD::ZIP1:              return "AArch64ISD::ZIP1";
1155
0
  case AArch64ISD::ZIP2:              return "AArch64ISD::ZIP2";
1156
0
  case AArch64ISD::UZP1:              return "AArch64ISD::UZP1";
1157
0
  case AArch64ISD::UZP2:              return "AArch64ISD::UZP2";
1158
0
  case AArch64ISD::TRN1:              return "AArch64ISD::TRN1";
1159
0
  case AArch64ISD::TRN2:              return "AArch64ISD::TRN2";
1160
0
  case AArch64ISD::REV16:             return "AArch64ISD::REV16";
1161
0
  case AArch64ISD::REV32:             return "AArch64ISD::REV32";
1162
0
  case AArch64ISD::REV64:             return "AArch64ISD::REV64";
1163
0
  case AArch64ISD::EXT:               return "AArch64ISD::EXT";
1164
0
  case AArch64ISD::VSHL:              return "AArch64ISD::VSHL";
1165
0
  case AArch64ISD::VLSHR:             return "AArch64ISD::VLSHR";
1166
0
  case AArch64ISD::VASHR:             return "AArch64ISD::VASHR";
1167
0
  case AArch64ISD::CMEQ:              return "AArch64ISD::CMEQ";
1168
0
  case AArch64ISD::CMGE:              return "AArch64ISD::CMGE";
1169
0
  case AArch64ISD::CMGT:              return "AArch64ISD::CMGT";
1170
0
  case AArch64ISD::CMHI:              return "AArch64ISD::CMHI";
1171
0
  case AArch64ISD::CMHS:              return "AArch64ISD::CMHS";
1172
0
  case AArch64ISD::FCMEQ:             return "AArch64ISD::FCMEQ";
1173
0
  case AArch64ISD::FCMGE:             return "AArch64ISD::FCMGE";
1174
0
  case AArch64ISD::FCMGT:             return "AArch64ISD::FCMGT";
1175
0
  case AArch64ISD::CMEQz:             return "AArch64ISD::CMEQz";
1176
0
  case AArch64ISD::CMGEz:             return "AArch64ISD::CMGEz";
1177
0
  case AArch64ISD::CMGTz:             return "AArch64ISD::CMGTz";
1178
0
  case AArch64ISD::CMLEz:             return "AArch64ISD::CMLEz";
1179
0
  case AArch64ISD::CMLTz:             return "AArch64ISD::CMLTz";
1180
0
  case AArch64ISD::FCMEQz:            return "AArch64ISD::FCMEQz";
1181
0
  case AArch64ISD::FCMGEz:            return "AArch64ISD::FCMGEz";
1182
0
  case AArch64ISD::FCMGTz:            return "AArch64ISD::FCMGTz";
1183
0
  case AArch64ISD::FCMLEz:            return "AArch64ISD::FCMLEz";
1184
0
  case AArch64ISD::FCMLTz:            return "AArch64ISD::FCMLTz";
1185
0
  case AArch64ISD::SADDV:             return "AArch64ISD::SADDV";
1186
0
  case AArch64ISD::UADDV:             return "AArch64ISD::UADDV";
1187
0
  case AArch64ISD::SMINV:             return "AArch64ISD::SMINV";
1188
0
  case AArch64ISD::UMINV:             return "AArch64ISD::UMINV";
1189
0
  case AArch64ISD::SMAXV:             return "AArch64ISD::SMAXV";
1190
0
  case AArch64ISD::UMAXV:             return "AArch64ISD::UMAXV";
1191
0
  case AArch64ISD::NOT:               return "AArch64ISD::NOT";
1192
0
  case AArch64ISD::BIT:               return "AArch64ISD::BIT";
1193
0
  case AArch64ISD::CBZ:               return "AArch64ISD::CBZ";
1194
0
  case AArch64ISD::CBNZ:              return "AArch64ISD::CBNZ";
1195
0
  case AArch64ISD::TBZ:               return "AArch64ISD::TBZ";
1196
0
  case AArch64ISD::TBNZ:              return "AArch64ISD::TBNZ";
1197
0
  case AArch64ISD::TC_RETURN:         return "AArch64ISD::TC_RETURN";
1198
0
  case AArch64ISD::PREFETCH:          return "AArch64ISD::PREFETCH";
1199
0
  case AArch64ISD::SITOF:             return "AArch64ISD::SITOF";
1200
0
  case AArch64ISD::UITOF:             return "AArch64ISD::UITOF";
1201
0
  case AArch64ISD::NVCAST:            return "AArch64ISD::NVCAST";
1202
0
  case AArch64ISD::SQSHL_I:           return "AArch64ISD::SQSHL_I";
1203
0
  case AArch64ISD::UQSHL_I:           return "AArch64ISD::UQSHL_I";
1204
0
  case AArch64ISD::SRSHR_I:           return "AArch64ISD::SRSHR_I";
1205
0
  case AArch64ISD::URSHR_I:           return "AArch64ISD::URSHR_I";
1206
0
  case AArch64ISD::SQSHLU_I:          return "AArch64ISD::SQSHLU_I";
1207
0
  case AArch64ISD::WrapperLarge:      return "AArch64ISD::WrapperLarge";
1208
0
  case AArch64ISD::LD2post:           return "AArch64ISD::LD2post";
1209
0
  case AArch64ISD::LD3post:           return "AArch64ISD::LD3post";
1210
0
  case AArch64ISD::LD4post:           return "AArch64ISD::LD4post";
1211
0
  case AArch64ISD::ST2post:           return "AArch64ISD::ST2post";
1212
0
  case AArch64ISD::ST3post:           return "AArch64ISD::ST3post";
1213
0
  case AArch64ISD::ST4post:           return "AArch64ISD::ST4post";
1214
0
  case AArch64ISD::LD1x2post:         return "AArch64ISD::LD1x2post";
1215
0
  case AArch64ISD::LD1x3post:         return "AArch64ISD::LD1x3post";
1216
0
  case AArch64ISD::LD1x4post:         return "AArch64ISD::LD1x4post";
1217
0
  case AArch64ISD::ST1x2post:         return "AArch64ISD::ST1x2post";
1218
0
  case AArch64ISD::ST1x3post:         return "AArch64ISD::ST1x3post";
1219
0
  case AArch64ISD::ST1x4post:         return "AArch64ISD::ST1x4post";
1220
0
  case AArch64ISD::LD1DUPpost:        return "AArch64ISD::LD1DUPpost";
1221
0
  case AArch64ISD::LD2DUPpost:        return "AArch64ISD::LD2DUPpost";
1222
0
  case AArch64ISD::LD3DUPpost:        return "AArch64ISD::LD3DUPpost";
1223
0
  case AArch64ISD::LD4DUPpost:        return "AArch64ISD::LD4DUPpost";
1224
0
  case AArch64ISD::LD1LANEpost:       return "AArch64ISD::LD1LANEpost";
1225
0
  case AArch64ISD::LD2LANEpost:       return "AArch64ISD::LD2LANEpost";
1226
0
  case AArch64ISD::LD3LANEpost:       return "AArch64ISD::LD3LANEpost";
1227
0
  case AArch64ISD::LD4LANEpost:       return "AArch64ISD::LD4LANEpost";
1228
0
  case AArch64ISD::ST2LANEpost:       return "AArch64ISD::ST2LANEpost";
1229
0
  case AArch64ISD::ST3LANEpost:       return "AArch64ISD::ST3LANEpost";
1230
0
  case AArch64ISD::ST4LANEpost:       return "AArch64ISD::ST4LANEpost";
1231
0
  case AArch64ISD::SMULL:             return "AArch64ISD::SMULL";
1232
0
  case AArch64ISD::UMULL:             return "AArch64ISD::UMULL";
1233
0
  case AArch64ISD::FRECPE:            return "AArch64ISD::FRECPE";
1234
0
  case AArch64ISD::FRECPS:            return "AArch64ISD::FRECPS";
1235
0
  case AArch64ISD::FRSQRTE:           return "AArch64ISD::FRSQRTE";
1236
0
  case AArch64ISD::FRSQRTS:           return "AArch64ISD::FRSQRTS";
1237
0
  case AArch64ISD::STG:               return "AArch64ISD::STG";
1238
0
  case AArch64ISD::STZG:              return "AArch64ISD::STZG";
1239
0
  case AArch64ISD::ST2G:              return "AArch64ISD::ST2G";
1240
0
  case AArch64ISD::STZ2G:             return "AArch64ISD::STZ2G";
1241
0
  }
1242
0
  return nullptr;
1243
0
}
1244
1245
MachineBasicBlock *
1246
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1247
3
                                    MachineBasicBlock *MBB) const {
1248
3
  // We materialise the F128CSEL pseudo-instruction as some control flow and a
1249
3
  // phi node:
1250
3
1251
3
  // OrigBB:
1252
3
  //     [... previous instrs leading to comparison ...]
1253
3
  //     b.ne TrueBB
1254
3
  //     b EndBB
1255
3
  // TrueBB:
1256
3
  //     ; Fallthrough
1257
3
  // EndBB:
1258
3
  //     Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1259
3
1260
3
  MachineFunction *MF = MBB->getParent();
1261
3
  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1262
3
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1263
3
  DebugLoc DL = MI.getDebugLoc();
1264
3
  MachineFunction::iterator It = ++MBB->getIterator();
1265
3
1266
3
  unsigned DestReg = MI.getOperand(0).getReg();
1267
3
  unsigned IfTrueReg = MI.getOperand(1).getReg();
1268
3
  unsigned IfFalseReg = MI.getOperand(2).getReg();
1269
3
  unsigned CondCode = MI.getOperand(3).getImm();
1270
3
  bool NZCVKilled = MI.getOperand(4).isKill();
1271
3
1272
3
  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1273
3
  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1274
3
  MF->insert(It, TrueBB);
1275
3
  MF->insert(It, EndBB);
1276
3
1277
3
  // Transfer rest of current basic-block to EndBB
1278
3
  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1279
3
                MBB->end());
1280
3
  EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1281
3
1282
3
  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1283
3
  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1284
3
  MBB->addSuccessor(TrueBB);
1285
3
  MBB->addSuccessor(EndBB);
1286
3
1287
3
  // TrueBB falls through to the end.
1288
3
  TrueBB->addSuccessor(EndBB);
1289
3
1290
3
  if (!NZCVKilled) {
1291
3
    TrueBB->addLiveIn(AArch64::NZCV);
1292
3
    EndBB->addLiveIn(AArch64::NZCV);
1293
3
  }
1294
3
1295
3
  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1296
3
      .addReg(IfTrueReg)
1297
3
      .addMBB(TrueBB)
1298
3
      .addReg(IfFalseReg)
1299
3
      .addMBB(MBB);
1300
3
1301
3
  MI.eraseFromParent();
1302
3
  return EndBB;
1303
3
}
1304
1305
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
1306
7
       MachineInstr &MI, MachineBasicBlock *BB) const {
1307
7
  assert(!isAsynchronousEHPersonality(classifyEHPersonality(
1308
7
             BB->getParent()->getFunction().getPersonalityFn())) &&
1309
7
         "SEH does not use catchret!");
1310
7
  return BB;
1311
7
}
1312
1313
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
1314
11
     MachineInstr &MI, MachineBasicBlock *BB) const {
1315
11
  MI.eraseFromParent();
1316
11
  return BB;
1317
11
}
1318
1319
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1320
85
    MachineInstr &MI, MachineBasicBlock *BB) const {
1321
85
  switch (MI.getOpcode()) {
1322
85
  default:
1323
#ifndef NDEBUG
1324
    MI.dump();
1325
#endif
1326
0
    llvm_unreachable("Unexpected instruction for custom inserter!");
1327
85
1328
85
  case AArch64::F128CSEL:
1329
3
    return EmitF128CSEL(MI, BB);
1330
85
1331
85
  case TargetOpcode::STACKMAP:
1332
64
  case TargetOpcode::PATCHPOINT:
1333
64
    return emitPatchPoint(MI, BB);
1334
64
1335
64
  case AArch64::CATCHRET:
1336
7
    return EmitLoweredCatchRet(MI, BB);
1337
64
  case AArch64::CATCHPAD:
1338
11
    return EmitLoweredCatchPad(MI, BB);
1339
85
  }
1340
85
}
1341
1342
//===----------------------------------------------------------------------===//
1343
// AArch64 Lowering private implementation.
1344
//===----------------------------------------------------------------------===//
1345
1346
//===----------------------------------------------------------------------===//
1347
// Lowering Code
1348
//===----------------------------------------------------------------------===//
1349
1350
/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1351
/// CC
1352
176k
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1353
176k
  switch (CC) {
1354
176k
  default:
1355
0
    llvm_unreachable("Unknown condition code!");
1356
176k
  case ISD::SETNE:
1357
36.9k
    return AArch64CC::NE;
1358
176k
  case ISD::SETEQ:
1359
37.2k
    return AArch64CC::EQ;
1360
176k
  case ISD::SETGT:
1361
14.6k
    return AArch64CC::GT;
1362
176k
  case ISD::SETGE:
1363
1.58k
    return AArch64CC::GE;
1364
176k
  case ISD::SETLT:
1365
34.4k
    return AArch64CC::LT;
1366
176k
  case ISD::SETLE:
1367
5.17k
    return AArch64CC::LE;
1368
176k
  case ISD::SETUGT:
1369
14.9k
    return AArch64CC::HI;
1370
176k
  case ISD::SETUGE:
1371
6.98k
    return AArch64CC::HS;
1372
176k
  case ISD::SETULT:
1373
19.0k
    return AArch64CC::LO;
1374
176k
  case ISD::SETULE:
1375
5.92k
    return AArch64CC::LS;
1376
176k
  }
1377
176k
}
1378
1379
/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1380
static void changeFPCCToAArch64CC(ISD::CondCode CC,
1381
                                  AArch64CC::CondCode &CondCode,
1382
4.18k
                                  AArch64CC::CondCode &CondCode2) {
1383
4.18k
  CondCode2 = AArch64CC::AL;
1384
4.18k
  switch (CC) {
1385
4.18k
  default:
1386
0
    llvm_unreachable("Unknown FP condition!");
1387
4.18k
  case ISD::SETEQ:
1388
303
  case ISD::SETOEQ:
1389
303
    CondCode = AArch64CC::EQ;
1390
303
    break;
1391
454
  case ISD::SETGT:
1392
454
  case ISD::SETOGT:
1393
454
    CondCode = AArch64CC::GT;
1394
454
    break;
1395
454
  case ISD::SETGE:
1396
122
  case ISD::SETOGE:
1397
122
    CondCode = AArch64CC::GE;
1398
122
    break;
1399
1.26k
  case ISD::SETOLT:
1400
1.26k
    CondCode = AArch64CC::MI;
1401
1.26k
    break;
1402
122
  case ISD::SETOLE:
1403
56
    CondCode = AArch64CC::LS;
1404
56
    break;
1405
122
  case ISD::SETONE:
1406
39
    CondCode = AArch64CC::MI;
1407
39
    CondCode2 = AArch64CC::GT;
1408
39
    break;
1409
122
  case ISD::SETO:
1410
71
    CondCode = AArch64CC::VC;
1411
71
    break;
1412
122
  case ISD::SETUO:
1413
96
    CondCode = AArch64CC::VS;
1414
96
    break;
1415
122
  case ISD::SETUEQ:
1416
54
    CondCode = AArch64CC::EQ;
1417
54
    CondCode2 = AArch64CC::VS;
1418
54
    break;
1419
122
  case ISD::SETUGT:
1420
46
    CondCode = AArch64CC::HI;
1421
46
    break;
1422
259
  case ISD::SETUGE:
1423
259
    CondCode = AArch64CC::PL;
1424
259
    break;
1425
138
  case ISD::SETLT:
1426
138
  case ISD::SETULT:
1427
138
    CondCode = AArch64CC::LT;
1428
138
    break;
1429
1.12k
  case ISD::SETLE:
1430
1.12k
  case ISD::SETULE:
1431
1.12k
    CondCode = AArch64CC::LE;
1432
1.12k
    break;
1433
1.12k
  case ISD::SETNE:
1434
164
  case ISD::SETUNE:
1435
164
    CondCode = AArch64CC::NE;
1436
164
    break;
1437
4.18k
  }
1438
4.18k
}
1439
1440
/// Convert a DAG fp condition code to an AArch64 CC.
1441
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1442
/// should be AND'ed instead of OR'ed.
1443
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1444
                                     AArch64CC::CondCode &CondCode,
1445
94
                                     AArch64CC::CondCode &CondCode2) {
1446
94
  CondCode2 = AArch64CC::AL;
1447
94
  switch (CC) {
1448
94
  default:
1449
83
    changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1450
83
    assert(CondCode2 == AArch64CC::AL);
1451
83
    break;
1452
94
  case ISD::SETONE:
1453
7
    // (a one b)
1454
7
    // == ((a olt b) || (a ogt b))
1455
7
    // == ((a ord b) && (a une b))
1456
7
    CondCode = AArch64CC::VC;
1457
7
    CondCode2 = AArch64CC::NE;
1458
7
    break;
1459
94
  case ISD::SETUEQ:
1460
4
    // (a ueq b)
1461
4
    // == ((a uno b) || (a oeq b))
1462
4
    // == ((a ule b) && (a uge b))
1463
4
    CondCode = AArch64CC::PL;
1464
4
    CondCode2 = AArch64CC::LE;
1465
4
    break;
1466
94
  }
1467
94
}
1468
1469
/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1470
/// CC usable with the vector instructions. Fewer operations are available
1471
/// without a real NZCV register, so we have to use less efficient combinations
1472
/// to get the same effect.
1473
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1474
                                        AArch64CC::CondCode &CondCode,
1475
                                        AArch64CC::CondCode &CondCode2,
1476
434
                                        bool &Invert) {
1477
434
  Invert = false;
1478
434
  switch (CC) {
1479
434
  default:
1480
362
    // Mostly the scalar mappings work fine.
1481
362
    changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1482
362
    break;
1483
434
  case ISD::SETUO:
1484
10
    Invert = true;
1485
10
    LLVM_FALLTHROUGH;
1486
20
  case ISD::SETO:
1487
20
    CondCode = AArch64CC::MI;
1488
20
    CondCode2 = AArch64CC::GE;
1489
20
    break;
1490
52
  case ISD::SETUEQ:
1491
52
  case ISD::SETULT:
1492
52
  case ISD::SETULE:
1493
52
  case ISD::SETUGT:
1494
52
  case ISD::SETUGE:
1495
52
    // All of the compare-mask comparisons are ordered, but we can switch
1496
52
    // between the two by a double inversion. E.g. ULE == !OGT.
1497
52
    Invert = true;
1498
52
    changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1499
52
    break;
1500
434
  }
1501
434
}
1502
1503
204k
static bool isLegalArithImmed(uint64_t C) {
1504
204k
  // Matches AArch64DAGToDAGISel::SelectArithImmed().
1505
204k
  bool IsLegal = (C >> 12 == 0) || 
(27.6k
(C & 0xFFFULL) == 027.6k
&&
C >> 24 == 016.3k
);
1506
204k
  LLVM_DEBUG(dbgs() << "Is imm " << C
1507
204k
                    << " legal: " << (IsLegal ? "yes\n" : "no\n"));
1508
204k
  return IsLegal;
1509
204k
}
1510
1511
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1512
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1513
// can be set differently by this operation. It comes down to whether
1514
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1515
// everything is fine. If not then the optimization is wrong. Thus general
1516
// comparisons are only valid if op2 != 0.
1517
//
1518
// So, finally, the only LLVM-native comparisons that don't mention C and V
1519
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1520
// the absence of information about op2.
1521
420k
static bool isCMN(SDValue Op, ISD::CondCode CC) {
1522
420k
  return Op.getOpcode() == ISD::SUB && 
isNullConstant(Op.getOperand(0))1.37k
&&
1523
420k
         
(222
CC == ISD::SETEQ222
||
CC == ISD::SETNE70
);
1524
420k
}
1525
1526
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1527
174k
                              const SDLoc &dl, SelectionDAG &DAG) {
1528
174k
  EVT VT = LHS.getValueType();
1529
174k
  const bool FullFP16 =
1530
174k
    static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1531
174k
1532
174k
  if (VT.isFloatingPoint()) {
1533
3.60k
    assert(VT != MVT::f128);
1534
3.60k
    if (VT == MVT::f16 && 
!FullFP1632
) {
1535
3
      LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1536
3
      RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1537
3
      VT = MVT::f32;
1538
3
    }
1539
3.60k
    return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1540
3.60k
  }
1541
170k
1542
170k
  // The CMP instruction is just an alias for SUBS, and representing it as
1543
170k
  // SUBS means that it's possible to get CSE with subtract operations.
1544
170k
  // A later phase can perform the optimization of setting the destination
1545
170k
  // register to WZR/XZR if it ends up being unused.
1546
170k
  unsigned Opcode = AArch64ISD::SUBS;
1547
170k
1548
170k
  if (isCMN(RHS, CC)) {
1549
142
    // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1550
142
    Opcode = AArch64ISD::ADDS;
1551
142
    RHS = RHS.getOperand(1);
1552
170k
  } else if (isCMN(LHS, CC)) {
1553
28
    // As we are looking for EQ/NE compares, the operands can be commuted ; can
1554
28
    // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
1555
28
    Opcode = AArch64ISD::ADDS;
1556
28
    LHS = LHS.getOperand(1);
1557
170k
  } else if (LHS.getOpcode() == ISD::AND && 
isNullConstant(RHS)3.96k
&&
1558
170k
             
!isUnsignedIntSetCC(CC)2.43k
) {
1559
2.43k
    // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1560
2.43k
    // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1561
2.43k
    // of the signed comparisons.
1562
2.43k
    Opcode = AArch64ISD::ANDS;
1563
2.43k
    RHS = LHS.getOperand(1);
1564
2.43k
    LHS = LHS.getOperand(0);
1565
2.43k
  }
1566
170k
1567
170k
  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1568
170k
      .getValue(1);
1569
170k
}
1570
1571
/// \defgroup AArch64CCMP CMP;CCMP matching
1572
///
1573
/// These functions deal with the formation of CMP;CCMP;... sequences.
1574
/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1575
/// a comparison. They set the NZCV flags to a predefined value if their
1576
/// predicate is false. This allows to express arbitrary conjunctions, for
1577
/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
1578
/// expressed as:
1579
///   cmp A
1580
///   ccmp B, inv(CB), CA
1581
///   check for CB flags
1582
///
1583
/// This naturally lets us implement chains of AND operations with SETCC
1584
/// operands. And we can even implement some other situations by transforming
1585
/// them:
1586
///   - We can implement (NEG SETCC) i.e. negating a single comparison by
1587
///     negating the flags used in a CCMP/FCCMP operations.
1588
///   - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1589
///     by negating the flags we test for afterwards. i.e.
1590
///     NEG (CMP CCMP CCCMP ...) can be implemented.
1591
///   - Note that we can only ever negate all previously processed results.
1592
///     What we can not implement by flipping the flags to test is a negation
1593
///     of two sub-trees (because the negation affects all sub-trees emitted so
1594
///     far, so the 2nd sub-tree we emit would also affect the first).
1595
/// With those tools we can implement some OR operations:
1596
///   - (OR (SETCC A) (SETCC B)) can be implemented via:
1597
///     NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1598
///   - After transforming OR to NEG/AND combinations we may be able to use NEG
1599
///     elimination rules from earlier to implement the whole thing as a
1600
///     CCMP/FCCMP chain.
1601
///
1602
/// As complete example:
1603
///     or (or (setCA (cmp A)) (setCB (cmp B)))
1604
///        (and (setCC (cmp C)) (setCD (cmp D)))"
1605
/// can be reassociated to:
1606
///     or (and (setCC (cmp C)) setCD (cmp D))
1607
//         (or (setCA (cmp A)) (setCB (cmp B)))
1608
/// can be transformed to:
1609
///     not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1610
///              (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1611
/// which can be implemented as:
1612
///   cmp C
1613
///   ccmp D, inv(CD), CC
1614
///   ccmp A, CA, inv(CD)
1615
///   ccmp B, CB, inv(CA)
1616
///   check for CB flags
1617
///
1618
/// A counterexample is "or (and A B) (and C D)" which translates to
1619
/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1620
/// can only implement 1 of the inner (not) operations, but not both!
1621
/// @{
1622
1623
/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1624
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1625
                                         ISD::CondCode CC, SDValue CCOp,
1626
                                         AArch64CC::CondCode Predicate,
1627
                                         AArch64CC::CondCode OutCC,
1628
150
                                         const SDLoc &DL, SelectionDAG &DAG) {
1629
150
  unsigned Opcode = 0;
1630
150
  const bool FullFP16 =
1631
150
    static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1632
150
1633
150
  if (LHS.getValueType().isFloatingPoint()) {
1634
62
    assert(LHS.getValueType() != MVT::f128);
1635
62
    if (LHS.getValueType() == MVT::f16 && 
!FullFP165
) {
1636
4
      LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1637
4
      RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1638
4
    }
1639
62
    Opcode = AArch64ISD::FCCMP;
1640
88
  } else if (RHS.getOpcode() == ISD::SUB) {
1641
0
    SDValue SubOp0 = RHS.getOperand(0);
1642
0
    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1643
0
      // See emitComparison() on why we can only do this for SETEQ and SETNE.
1644
0
      Opcode = AArch64ISD::CCMN;
1645
0
      RHS = RHS.getOperand(1);
1646
0
    }
1647
0
  }
1648
150
  if (Opcode == 0)
1649
88
    Opcode = AArch64ISD::CCMP;
1650
150
1651
150
  SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1652
150
  AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1653
150
  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1654
150
  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1655
150
  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1656
150
}
1657
1658
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
1659
/// expressed as a conjunction. See \ref AArch64CCMP.
1660
/// \param CanNegate    Set to true if we can negate the whole sub-tree just by
1661
///                     changing the conditions on the SETCC tests.
1662
///                     (this means we can call emitConjunctionRec() with
1663
///                      Negate==true on this sub-tree)
1664
/// \param MustBeFirst  Set to true if this subtree needs to be negated and we
1665
///                     cannot do the negation naturally. We are required to
1666
///                     emit the subtree first in this case.
1667
/// \param WillNegate   Is true if are called when the result of this
1668
///                     subexpression must be negated. This happens when the
1669
///                     outer expression is an OR. We can use this fact to know
1670
///                     that we have a double negation (or (or ...) ...) that
1671
///                     can be implemented for free.
1672
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1673
                               bool &MustBeFirst, bool WillNegate,
1674
22.9k
                               unsigned Depth = 0) {
1675
22.9k
  if (!Val.hasOneUse())
1676
7.19k
    return false;
1677
15.7k
  unsigned Opcode = Val->getOpcode();
1678
15.7k
  if (Opcode == ISD::SETCC) {
1679
568
    if (Val->getOperand(0).getValueType() == MVT::f128)
1680
1
      return false;
1681
567
    CanNegate = true;
1682
567
    MustBeFirst = false;
1683
567
    return true;
1684
567
  }
1685
15.2k
  // Protect against exponential runtime and stack overflow.
1686
15.2k
  if (Depth > 6)
1687
0
    return false;
1688
15.2k
  if (Opcode == ISD::AND || 
Opcode == ISD::OR12.9k
) {
1689
6.56k
    bool IsOR = Opcode == ISD::OR;
1690
6.56k
    SDValue O0 = Val->getOperand(0);
1691
6.56k
    SDValue O1 = Val->getOperand(1);
1692
6.56k
    bool CanNegateL;
1693
6.56k
    bool MustBeFirstL;
1694
6.56k
    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
1695
6.39k
      return false;
1696
165
    bool CanNegateR;
1697
165
    bool MustBeFirstR;
1698
165
    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
1699
1
      return false;
1700
164
1701
164
    if (MustBeFirstL && 
MustBeFirstR7
)
1702
1
      return false;
1703
163
1704
163
    if (IsOR) {
1705
105
      // For an OR expression we need to be able to naturally negate at least
1706
105
      // one side or we cannot do the transformation at all.
1707
105
      if (!CanNegateL && 
!CanNegateR1
)
1708
1
        return false;
1709
104
      // If we the result of the OR will be negated and we can naturally negate
1710
104
      // the leafs, then this sub-tree as a whole negates naturally.
1711
104
      CanNegate = WillNegate && 
CanNegateL8
&&
CanNegateR8
;
1712
104
      // If we cannot naturally negate the whole sub-tree, then this must be
1713
104
      // emitted first.
1714
104
      MustBeFirst = !CanNegate;
1715
104
    } else {
1716
58
      assert(Opcode == ISD::AND && "Must be OR or AND");
1717
58
      // We cannot naturally negate an AND operation.
1718
58
      CanNegate = false;
1719
58
      MustBeFirst = MustBeFirstL || 
MustBeFirstR52
;
1720
58
    }
1721
163
    
return true162
;
1722
8.66k
  }
1723
8.66k
  return false;
1724
8.66k
}
1725
1726
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1727
/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1728
/// Tries to transform the given i1 producing node @p Val to a series compare
1729
/// and conditional compare operations. @returns an NZCV flags producing node
1730
/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1731
/// transformation was not possible.
1732
/// \p Negate is true if we want this sub-tree being negated just by changing
1733
/// SETCC conditions.
1734
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
1735
    AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1736
400
    AArch64CC::CondCode Predicate) {
1737
400
  // We're at a tree leaf, produce a conditional comparison operation.
1738
400
  unsigned Opcode = Val->getOpcode();
1739
400
  if (Opcode == ISD::SETCC) {
1740
261
    SDValue LHS = Val->getOperand(0);
1741
261
    SDValue RHS = Val->getOperand(1);
1742
261
    ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1743
261
    bool isInteger = LHS.getValueType().isInteger();
1744
261
    if (Negate)
1745
174
      CC = getSetCCInverse(CC, isInteger);
1746
261
    SDLoc DL(Val);
1747
261
    // Determine OutCC and handle FP special case.
1748
261
    if (isInteger) {
1749
167
      OutCC = changeIntCCToAArch64CC(CC);
1750
167
    } else {
1751
94
      assert(LHS.getValueType().isFloatingPoint());
1752
94
      AArch64CC::CondCode ExtraCC;
1753
94
      changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1754
94
      // Some floating point conditions can't be tested with a single condition
1755
94
      // code. Construct an additional comparison in this case.
1756
94
      if (ExtraCC != AArch64CC::AL) {
1757
11
        SDValue ExtraCmp;
1758
11
        if (!CCOp.getNode())
1759
4
          ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1760
7
        else
1761
7
          ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1762
7
                                               ExtraCC, DL, DAG);
1763
11
        CCOp = ExtraCmp;
1764
11
        Predicate = ExtraCC;
1765
11
      }
1766
94
    }
1767
261
1768
261
    // Produce a normal comparison if we are first in the chain
1769
261
    if (!CCOp)
1770
118
      return emitComparison(LHS, RHS, CC, DL, DAG);
1771
143
    // Otherwise produce a ccmp.
1772
143
    return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1773
143
                                     DAG);
1774
143
  }
1775
139
  assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
1776
139
1777
139
  bool IsOR = Opcode == ISD::OR;
1778
139
1779
139
  SDValue LHS = Val->getOperand(0);
1780
139
  bool CanNegateL;
1781
139
  bool MustBeFirstL;
1782
139
  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
1783
139
  assert(ValidL && "Valid conjunction/disjunction tree");
1784
139
  (void)ValidL;
1785
139
1786
139
  SDValue RHS = Val->getOperand(1);
1787
139
  bool CanNegateR;
1788
139
  bool MustBeFirstR;
1789
139
  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
1790
139
  assert(ValidR && "Valid conjunction/disjunction tree");
1791
139
  (void)ValidR;
1792
139
1793
139
  // Swap sub-tree that must come first to the right side.
1794
139
  if (MustBeFirstL) {
1795
5
    assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
1796
5
    std::swap(LHS, RHS);
1797
5
    std::swap(CanNegateL, CanNegateR);
1798
5
    std::swap(MustBeFirstL, MustBeFirstR);
1799
5
  }
1800
139
1801
139
  bool NegateR;
1802
139
  bool NegateAfterR;
1803
139
  bool NegateL;
1804
139
  bool NegateAfterAll;
1805
139
  if (Opcode == ISD::OR) {
1806
90
    // Swap the sub-tree that we can negate naturally to the left.
1807
90
    if (!CanNegateL) {
1808
0
      assert(CanNegateR && "at least one side must be negatable");
1809
0
      assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
1810
0
      assert(!Negate);
1811
0
      std::swap(LHS, RHS);
1812
0
      NegateR = false;
1813
0
      NegateAfterR = true;
1814
90
    } else {
1815
90
      // Negate the left sub-tree if possible, otherwise negate the result.
1816
90
      NegateR = CanNegateR;
1817
90
      NegateAfterR = !CanNegateR;
1818
90
    }
1819
90
    NegateL = true;
1820
90
    NegateAfterAll = !Negate;
1821
90
  } else {
1822
49
    assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
1823
49
    assert(!Negate && "Valid conjunction/disjunction tree");
1824
49
1825
49
    NegateL = false;
1826
49
    NegateR = false;
1827
49
    NegateAfterR = false;
1828
49
    NegateAfterAll = false;
1829
49
  }
1830
139
1831
139
  // Emit sub-trees.
1832
139
  AArch64CC::CondCode RHSCC;
1833
139
  SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
1834
139
  if (NegateAfterR)
1835
2
    RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1836
139
  SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
1837
139
  if (NegateAfterAll)
1838
86
    OutCC = AArch64CC::getInvertedCondCode(OutCC);
1839
139
  return CmpL;
1840
139
}
1841
1842
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
1843
/// In some cases this is even possible with OR operations in the expression.
1844
/// See \ref AArch64CCMP.
1845
/// \see emitConjunctionRec().
1846
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
1847
15.9k
                               AArch64CC::CondCode &OutCC) {
1848
15.9k
  bool DummyCanNegate;
1849
15.9k
  bool DummyMustBeFirst;
1850
15.9k
  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
1851
15.8k
    return SDValue();
1852
122
1853
122
  return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
1854
122
}
1855
1856
/// @}
1857
1858
/// Returns how profitable it is to fold a comparison's operand's shift and/or
1859
/// extension operations.
1860
159k
static unsigned getCmpOperandFoldingProfit(SDValue Op) {
1861
159k
  auto isSupportedExtend = [&](SDValue V) {
1862
109k
    if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
1863
218
      return true;
1864
109k
1865
109k
    if (V.getOpcode() == ISD::AND)
1866
1.19k
      if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
1867
1.17k
        uint64_t Mask = MaskCst->getZExtValue();
1868
1.17k
        return (Mask == 0xFF || 
Mask == 0xFFFF953
||
Mask == 0xFFFFFFFF815
);
1869
1.17k
      }
1870
108k
1871
108k
    return false;
1872
108k
  };
1873
159k
1874
159k
  if (!Op.hasOneUse())
1875
50.8k
    return 0;
1876
108k
1877
108k
  if (isSupportedExtend(Op))
1878
555
    return 1;
1879
107k
1880
107k
  unsigned Opc = Op.getOpcode();
1881
107k
  if (Opc == ISD::SHL || 
Opc == ISD::SRL106k
||
Opc == ISD::SRA106k
)
1882
1.70k
    if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1883
1.68k
      uint64_t Shift = ShiftCst->getZExtValue();
1884
1.68k
      if (isSupportedExtend(Op.getOperand(0)))
1885
21
        return (Shift <= 4) ? 
217
:
14
;
1886
1.66k
      EVT VT = Op.getValueType();
1887
1.66k
      if ((VT == MVT::i32 && 
Shift <= 311.44k
) ||
(224
VT == MVT::i64224
&&
Shift <= 63224
))
1888
1.66k
        return 1;
1889
105k
    }
1890
105k
1891
105k
  return 0;
1892
105k
}
1893
1894
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1895
                             SDValue &AArch64cc, SelectionDAG &DAG,
1896
170k
                             const SDLoc &dl) {
1897
170k
  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1898
100k
    EVT VT = RHS.getValueType();
1899
100k
    uint64_t C = RHSC->getZExtValue();
1900
100k
    if (!isLegalArithImmed(C)) {
1901
11.2k
      // Constant does not fit, try adjusting it by one?
1902
11.2k
      switch (CC) {
1903
11.2k
      default:
1904
4.44k
        break;
1905
11.2k
      case ISD::SETLT:
1906
4.50k
      case ISD::SETGE:
1907
4.50k
        if ((VT == MVT::i32 && 
C != 0x80000000100
&&
1908
4.50k
             
isLegalArithImmed((uint32_t)(C - 1))100
) ||
1909
4.50k
            
(4.50k
VT == MVT::i644.50k
&&
C != 0x80000000ULL4.40k
&&
1910
4.50k
             
isLegalArithImmed(C - 1ULL)83
)) {
1911
2
          CC = (CC == ISD::SETLT) ? ISD::SETLE : 
ISD::SETGT0
;
1912
2
          C = (VT == MVT::i32) ? 
(uint32_t)(C - 1)1
:
C - 11
;
1913
2
          RHS = DAG.getConstant(C, dl, VT);
1914
2
        }
1915
4.50k
        break;
1916
4.50k
      case ISD::SETULT:
1917
1.74k
      case ISD::SETUGE:
1918
1.74k
        if ((VT == MVT::i32 && 
C != 036
&&
1919
1.74k
             
isLegalArithImmed((uint32_t)(C - 1))36
) ||
1920
1.74k
            
(1.73k
VT == MVT::i641.73k
&&
C != 0ULL1.70k
&&
isLegalArithImmed(C - 1ULL)1.70k
)) {
1921
1.42k
          CC = (CC == ISD::SETULT) ? 
ISD::SETULE1.42k
:
ISD::SETUGT2
;
1922
1.42k
          C = (VT == MVT::i32) ? 
(uint32_t)(C - 1)4
:
C - 11.42k
;
1923
1.42k
          RHS = DAG.getConstant(C, dl, VT);
1924
1.42k
        }
1925
1.74k
        break;
1926
1.74k
      case ISD::SETLE:
1927
333
      case ISD::SETGT:
1928
333
        if ((VT == MVT::i32 && 
C != INT32_MAX167
&&
1929
333
             
isLegalArithImmed((uint32_t)(C + 1))167
) ||
1930
333
            
(266
VT == MVT::i64266
&&
C != INT64_MAX166
&&
1931
266
             
isLegalArithImmed(C + 1ULL)166
)) {
1932
89
          CC = (CC == ISD::SETLE) ? 
ISD::SETLT27
:
ISD::SETGE62
;
1933
89
          C = (VT == MVT::i32) ? 
(uint32_t)(C + 1)67
:
C + 122
;
1934
89
          RHS = DAG.getConstant(C, dl, VT);
1935
89
        }
1936
333
        break;
1937
333
      case ISD::SETULE:
1938
234
      case ISD::SETUGT:
1939
234
        if ((VT == MVT::i32 && 
C != UINT32_MAX48
&&
1940
234
             
isLegalArithImmed((uint32_t)(C + 1))48
) ||
1941
234
            (VT == MVT::i64 && 
C != UINT64_MAX186
&&
1942
234
             
isLegalArithImmed(C + 1ULL)184
)) {
1943
0
          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1944
0
          C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1945
0
          RHS = DAG.getConstant(C, dl, VT);
1946
0
        }
1947
234
        break;
1948
170k
      }
1949
170k
    }
1950
100k
  }
1951
170k
1952
170k
  // Comparisons are canonicalized so that the RHS operand is simpler than the
1953
170k
  // LHS one, the extreme case being when RHS is an immediate. However, AArch64
1954
170k
  // can fold some shift+extend operations on the RHS operand, so swap the
1955
170k
  // operands if that can be done.
1956
170k
  //
1957
170k
  // For example:
1958
170k
  //    lsl     w13, w11, #1
1959
170k
  //    cmp     w13, w12
1960
170k
  // can be turned into:
1961
170k
  //    cmp     w12, w11, lsl #1
1962
170k
  if (!isa<ConstantSDNode>(RHS) ||
1963
170k
      
!isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())100k
) {
1964
79.5k
    SDValue TheLHS = isCMN(LHS, CC) ? 
LHS.getOperand(1)30
:
LHS79.4k
;
1965
79.5k
1966
79.5k
    if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
1967
1.82k
      std::swap(LHS, RHS);
1968
1.82k
      CC = ISD::getSetCCSwappedOperands(CC);
1969
1.82k
    }
1970
79.5k
  }
1971
170k
1972
170k
  SDValue Cmp;
1973
170k
  AArch64CC::CondCode AArch64CC;
1974
170k
  if ((CC == ISD::SETEQ || 
CC == ISD::SETNE134k
) &&
isa<ConstantSDNode>(RHS)73.2k
) {
1975
44.8k
    const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1976
44.8k
1977
44.8k
    // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1978
44.8k
    // For the i8 operand, the largest immediate is 255, so this can be easily
1979
44.8k
    // encoded in the compare instruction. For the i16 operand, however, the
1980
44.8k
    // largest immediate cannot be encoded in the compare.
1981
44.8k
    // Therefore, use a sign extending load and cmn to avoid materializing the
1982
44.8k
    // -1 constant. For example,
1983
44.8k
    // movz w1, #65535
1984
44.8k
    // ldrh w0, [x0, #0]
1985
44.8k
    // cmp w0, w1
1986
44.8k
    // >
1987
44.8k
    // ldrsh w0, [x0, #0]
1988
44.8k
    // cmn w0, #1
1989
44.8k
    // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1990
44.8k
    // if and only if (sext LHS) == (sext RHS). The checks are in place to
1991
44.8k
    // ensure both the LHS and RHS are truly zero extended and to make sure the
1992
44.8k
    // transformation is profitable.
1993
44.8k
    if ((RHSC->getZExtValue() >> 16 == 0) && 
isa<LoadSDNode>(LHS)39.9k
&&
1994
44.8k
        
cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD4.37k
&&
1995
44.8k
        
cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i163.22k
&&
1996
44.8k
        
LHS.getNode()->hasNUsesOfValue(1, 0)306
) {
1997
54
      int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1998
54
      if (ValueofRHS < 0 && 
isLegalArithImmed(-ValueofRHS)17
) {
1999
13
        SDValue SExt =
2000
13
            DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2001
13
                        DAG.getValueType(MVT::i16));
2002
13
        Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2003
13
                                                   RHS.getValueType()),
2004
13
                             CC, dl, DAG);
2005
13
        AArch64CC = changeIntCCToAArch64CC(CC);
2006
13
      }
2007
54
    }
2008
44.8k
2009
44.8k
    if (!Cmp && 
(44.8k
RHSC->isNullValue()44.8k
||
RHSC->isOne()34.7k
)) {
2010
15.9k
      if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2011
122
        if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2012
1
          AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2013
122
      }
2014
15.9k
    }
2015
44.8k
  }
2016
170k
2017
170k
  if (!Cmp) {
2018
170k
    Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2019
170k
    AArch64CC = changeIntCCToAArch64CC(CC);
2020
170k
  }
2021
170k
  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2022
170k
  return Cmp;
2023
170k
}
2024
2025
static std::pair<SDValue, SDValue>
2026
2.32k
getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2027
2.32k
  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
2028
2.32k
         "Unsupported value type");
2029
2.32k
  SDValue Value, Overflow;
2030
2.32k
  SDLoc DL(Op);
2031
2.32k
  SDValue LHS = Op.getOperand(0);
2032
2.32k
  SDValue RHS = Op.getOperand(1);
2033
2.32k
  unsigned Opc = 0;
2034
2.32k
  switch (Op.getOpcode()) {
2035
2.32k
  default:
2036
0
    llvm_unreachable("Unknown overflow instruction!");
2037
2.32k
  case ISD::SADDO:
2038
41
    Opc = AArch64ISD::ADDS;
2039
41
    CC = AArch64CC::VS;
2040
41
    break;
2041
2.32k
  case ISD::UADDO:
2042
1.66k
    Opc = AArch64ISD::ADDS;
2043
1.66k
    CC = AArch64CC::HS;
2044
1.66k
    break;
2045
2.32k
  case ISD::SSUBO:
2046
24
    Opc = AArch64ISD::SUBS;
2047
24
    CC = AArch64CC::VS;
2048
24
    break;
2049
2.32k
  case ISD::USUBO:
2050
82
    Opc = AArch64ISD::SUBS;
2051
82
    CC = AArch64CC::LO;
2052
82
    break;
2053
2.32k
  // Multiply needs a little bit extra work.
2054
2.32k
  case ISD::SMULO:
2055
518
  case ISD::UMULO: {
2056
518
    CC = AArch64CC::NE;
2057
518
    bool IsSigned = Op.getOpcode() == ISD::SMULO;
2058
518
    if (Op.getValueType() == MVT::i32) {
2059
23
      unsigned ExtendOpc = IsSigned ? 
ISD::SIGN_EXTEND9
:
ISD::ZERO_EXTEND14
;
2060
23
      // For a 32 bit multiply with overflow check we want the instruction
2061
23
      // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2062
23
      // need to generate the following pattern:
2063
23
      // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2064
23
      LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2065
23
      RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2066
23
      SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2067
23
      SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2068
23
                                DAG.getConstant(0, DL, MVT::i64));
2069
23
      // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2070
23
      // operation. We need to clear out the upper 32 bits, because we used a
2071
23
      // widening multiply that wrote all 64 bits. In the end this should be a
2072
23
      // noop.
2073
23
      Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2074
23
      if (IsSigned) {
2075
9
        // The signed overflow check requires more than just a simple check for
2076
9
        // any bit set in the upper 32 bits of the result. These bits could be
2077
9
        // just the sign bits of a negative number. To perform the overflow
2078
9
        // check we have to arithmetic shift right the 32nd bit of the result by
2079
9
        // 31 bits. Then we compare the result to the upper 32 bits.
2080
9
        SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2081
9
                                        DAG.getConstant(32, DL, MVT::i64));
2082
9
        UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2083
9
        SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2084
9
                                        DAG.getConstant(31, DL, MVT::i64));
2085
9
        // It is important that LowerBits is last, otherwise the arithmetic
2086
9
        // shift will not be folded into the compare (SUBS).
2087
9
        SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2088
9
        Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2089
9
                       .getValue(1);
2090
14
      } else {
2091
14
        // The overflow check for unsigned multiply is easy. We only need to
2092
14
        // check if any of the upper 32 bits are set. This can be done with a
2093
14
        // CMP (shifted register). For that we need to generate the following
2094
14
        // pattern:
2095
14
        // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2096
14
        SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2097
14
                                        DAG.getConstant(32, DL, MVT::i64));
2098
14
        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2099
14
        Overflow =
2100
14
            DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2101
14
                        DAG.getConstant(0, DL, MVT::i64),
2102
14
                        UpperBits).getValue(1);
2103
14
      }
2104
23
      break;
2105
23
    }
2106
495
    assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
2107
495
    // For the 64 bit multiply
2108
495
    Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2109
495
    if (IsSigned) {
2110
5
      SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2111
5
      SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2112
5
                                      DAG.getConstant(63, DL, MVT::i64));
2113
5
      // It is important that LowerBits is last, otherwise the arithmetic
2114
5
      // shift will not be folded into the compare (SUBS).
2115
5
      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2116
5
      Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2117
5
                     .getValue(1);
2118
490
    } else {
2119
490
      SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2120
490
      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2121
490
      Overflow =
2122
490
          DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2123
490
                      DAG.getConstant(0, DL, MVT::i64),
2124
490
                      UpperBits).getValue(1);
2125
490
    }
2126
495
    break;
2127
495
  }
2128
2.32k
  } // switch (...)
2129
2.32k
2130
2.32k
  if (Opc) {
2131
1.81k
    SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2132
1.81k
2133
1.81k
    // Emit the AArch64 operation with overflow check.
2134
1.81k
    Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2135
1.81k
    Overflow = Value.getValue(1);
2136
1.81k
  }
2137
2.32k
  return std::make_pair(Value, Overflow);
2138
2.32k
}
2139
2140
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2141
94
                                             RTLIB::Libcall Call) const {
2142
94
  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2143
94
  return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
2144
94
}
2145
2146
// Returns true if the given Op is the overflow flag result of an overflow
2147
// intrinsic operation.
2148
367k
static bool isOverflowIntrOpRes(SDValue Op) {
2149
367k
  unsigned Opc = Op.getOpcode();
2150
367k
  return (Op.getResNo() == 1 &&
2151
367k
          
(919
Opc == ISD::SADDO919
||
Opc == ISD::UADDO901
||
Opc == ISD::SSUBO323
||
2152
919
           
Opc == ISD::USUBO306
||
Opc == ISD::SMULO260
||
Opc == ISD::UMULO249
));
2153
367k
}
2154
2155
14.3k
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2156
14.3k
  SDValue Sel = Op.getOperand(0);
2157
14.3k
  SDValue Other = Op.getOperand(1);
2158
14.3k
  SDLoc dl(Sel);
2159
14.3k
2160
14.3k
  // If the operand is an overflow checking operation, invert the condition
2161
14.3k
  // code and kill the Not operation. I.e., transform:
2162
14.3k
  // (xor (overflow_op_bool, 1))
2163
14.3k
  //   -->
2164
14.3k
  // (csel 1, 0, invert(cc), overflow_op_bool)
2165
14.3k
  // ... which later gets transformed to just a cset instruction with an
2166
14.3k
  // inverted condition code, rather than a cset + eor sequence.
2167
14.3k
  if (isOneConstant(Other) && 
isOverflowIntrOpRes(Sel)594
) {
2168
32
    // Only lower legal XALUO ops.
2169
32
    if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2170
0
      return SDValue();
2171
32
2172
32
    SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2173
32
    SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2174
32
    AArch64CC::CondCode CC;
2175
32
    SDValue Value, Overflow;
2176
32
    std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2177
32
    SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2178
32
    return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2179
32
                       CCVal, Overflow);
2180
32
  }
2181
14.3k
  // If neither operand is a SELECT_CC, give up.
2182
14.3k
  if (Sel.getOpcode() != ISD::SELECT_CC)
2183
14.3k
    std::swap(Sel, Other);
2184
14.3k
  if (Sel.getOpcode() != ISD::SELECT_CC)
2185
14.3k
    return Op;
2186
17
2187
17
  // The folding we want to perform is:
2188
17
  // (xor x, (select_cc a, b, cc, 0, -1) )
2189
17
  //   -->
2190
17
  // (csel x, (xor x, -1), cc ...)
2191
17
  //
2192
17
  // The latter will get matched to a CSINV instruction.
2193
17
2194
17
  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2195
17
  SDValue LHS = Sel.getOperand(0);
2196
17
  SDValue RHS = Sel.getOperand(1);
2197
17
  SDValue TVal = Sel.getOperand(2);
2198
17
  SDValue FVal = Sel.getOperand(3);
2199
17
2200
17
  // FIXME: This could be generalized to non-integer comparisons.
2201
17
  if (LHS.getValueType() != MVT::i32 && 
LHS.getValueType() != MVT::i647
)
2202
0
    return Op;
2203
17
2204
17
  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2205
17
  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2206
17
2207
17
  // The values aren't constants, this isn't the pattern we're looking for.
2208
17
  if (!CFVal || 
!CTVal7
)
2209
16
    return Op;
2210
1
2211
1
  // We can commute the SELECT_CC by inverting the condition.  This
2212
1
  // might be needed to make this fit into a CSINV pattern.
2213
1
  if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2214
1
    std::swap(TVal, FVal);
2215
1
    std::swap(CTVal, CFVal);
2216
1
    CC = ISD::getSetCCInverse(CC, true);
2217
1
  }
2218
1
2219
1
  // If the constants line up, perform the transform!
2220
1
  if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2221
1
    SDValue CCVal;
2222
1
    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2223
1
2224
1
    FVal = Other;
2225
1
    TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2226
1
                       DAG.getConstant(-1ULL, dl, Other.getValueType()));
2227
1
2228
1
    return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2229
1
                       CCVal, Cmp);
2230
1
  }
2231
0
2232
0
  return Op;
2233
0
}
2234
2235
174
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2236
174
  EVT VT = Op.getValueType();
2237
174
2238
174
  // Let legalize expand this if it isn't a legal type yet.
2239
174
  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2240
0
    return SDValue();
2241
174
2242
174
  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2243
174
2244
174
  unsigned Opc;
2245
174
  bool ExtraOp = false;
2246
174
  switch (Op.getOpcode()) {
2247
174
  default:
2248
0
    llvm_unreachable("Invalid code");
2249
174
  case ISD::ADDC:
2250
41
    Opc = AArch64ISD::ADDS;
2251
41
    break;
2252
174
  case ISD::SUBC:
2253
44
    Opc = AArch64ISD::SUBS;
2254
44
    break;
2255
174
  case ISD::ADDE:
2256
45
    Opc = AArch64ISD::ADCS;
2257
45
    ExtraOp = true;
2258
45
    break;
2259
174
  case ISD::SUBE:
2260
44
    Opc = AArch64ISD::SBCS;
2261
44
    ExtraOp = true;
2262
44
    break;
2263
174
  }
2264
174
2265
174
  if (!ExtraOp)
2266
85
    return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2267
89
  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2268
89
                     Op.getOperand(2));
2269
89
}
2270
2271
1.41k
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2272
1.41k
  // Let legalize expand this if it isn't a legal type yet.
2273
1.41k
  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2274
0
    return SDValue();
2275
1.41k
2276
1.41k
  SDLoc dl(Op);
2277
1.41k
  AArch64CC::CondCode CC;
2278
1.41k
  // The actual operation that sets the overflow or carry flag.
2279
1.41k
  SDValue Value, Overflow;
2280
1.41k
  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2281
1.41k
2282
1.41k
  // We use 0 and 1 as false and true values.
2283
1.41k
  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2284
1.41k
  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2285
1.41k
2286
1.41k
  // We use an inverted condition, because the conditional select is inverted
2287
1.41k
  // too. This will allow it to be selected to a single instruction:
2288
1.41k
  // CSINC Wd, WZR, WZR, invert(cond).
2289
1.41k
  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2290
1.41k
  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2291
1.41k
                         CCVal, Overflow);
2292
1.41k
2293
1.41k
  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2294
1.41k
  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2295
1.41k
}
2296
2297
// Prefetch operands are:
2298
// 1: Address to prefetch
2299
// 2: bool isWrite
2300
// 3: int locality (0 = no locality ... 3 = extreme locality)
2301
// 4: bool isDataCache
2302
82
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2303
82
  SDLoc DL(Op);
2304
82
  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2305
82
  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2306
82
  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2307
82
2308
82
  bool IsStream = !Locality;
2309
82
  // When the locality number is set
2310
82
  if (Locality) {
2311
76
    // The front-end should have filtered out the out-of-range values
2312
76
    assert(Locality <= 3 && "Prefetch locality out-of-range");
2313
76
    // The locality degree is the opposite of the cache speed.
2314
76
    // Put the number the other way around.
2315
76
    // The encoding starts at 0 for level 1
2316
76
    Locality = 3 - Locality;
2317
76
  }
2318
82
2319
82
  // built the mask value encoding the expected behavior.
2320
82
  unsigned PrfOp = (IsWrite << 4) |     // Load/Store bit
2321
82
                   (!IsData << 3) |     // IsDataCache bit
2322
82
                   (Locality << 1) |    // Cache level bits
2323
82
                   (unsigned)IsStream;  // Stream bit
2324
82
  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2325
82
                     DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2326
82
}
2327
2328
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2329
2
                                              SelectionDAG &DAG) const {
2330
2
  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
2331
2
2332
2
  RTLIB::Libcall LC;
2333
2
  LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2334
2
2335
2
  return LowerF128Call(Op, DAG, LC);
2336
2
}
2337
2338
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2339
794
                                             SelectionDAG &DAG) const {
2340
794
  if (Op.getOperand(0).getValueType() != MVT::f128) {
2341
789
    // It's legal except when f128 is involved
2342
789
    return Op;
2343
789
  }
2344
5
2345
5
  RTLIB::Libcall LC;
2346
5
  LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2347
5
2348
5
  // FP_ROUND node has a second operand indicating whether it is known to be
2349
5
  // precise. That doesn't take part in the LibCall so we can't directly use
2350
5
  // LowerF128Call.
2351
5
  SDValue SrcVal = Op.getOperand(0);
2352
5
  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2353
5
                     SDLoc(Op)).first;
2354
5
}
2355
2356
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
2357
831
                                                    SelectionDAG &DAG) const {
2358
831
  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2359
831
  // Any additional optimization in this function should be recorded
2360
831
  // in the cost tables.
2361
831
  EVT InVT = Op.getOperand(0).getValueType();
2362
831
  EVT VT = Op.getValueType();
2363
831
  unsigned NumElts = InVT.getVectorNumElements();
2364
831
2365
831
  // f16 conversions are promoted to f32 when full fp16 is not supported.
2366
831
  if (InVT.getVectorElementType() == MVT::f16 &&
2367
831
      
!Subtarget->hasFullFP16()34
) {
2368
8
    MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2369
8
    SDLoc dl(Op);
2370
8
    return DAG.getNode(
2371
8
        Op.getOpcode(), dl, Op.getValueType(),
2372
8
        DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2373
8
  }
2374
823
2375
823
  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2376
129
    SDLoc dl(Op);
2377
129
    SDValue Cv =
2378
129
        DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2379
129
                    Op.getOperand(0));
2380
129
    return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2381
129
  }
2382
694
2383
694
  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2384
3
    SDLoc dl(Op);
2385
3
    MVT ExtVT =
2386
3
        MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2387
3
                         VT.getVectorNumElements());
2388
3
    SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2389
3
    return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2390
3
  }
2391
691
2392
691
  // Type changing conversions are illegal.
2393
691
  return Op;
2394
691
}
2395
2396
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2397
3.91k
                                              SelectionDAG &DAG) const {
2398
3.91k
  if (Op.getOperand(0).getValueType().isVector())
2399
831
    return LowerVectorFP_TO_INT(Op, DAG);
2400
3.08k
2401
3.08k
  // f16 conversions are promoted to f32 when full fp16 is not supported.
2402
3.08k
  if (Op.getOperand(0).getValueType() == MVT::f16 &&
2403
3.08k
      
!Subtarget->hasFullFP16()35
) {
2404
11
    SDLoc dl(Op);
2405
11
    return DAG.getNode(
2406
11
        Op.getOpcode(), dl, Op.getValueType(),
2407
11
        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2408
11
  }
2409
3.07k
2410
3.07k
  if (Op.getOperand(0).getValueType() != MVT::f128) {
2411
3.06k
    // It's legal except when f128 is involved
2412
3.06k
    return Op;
2413
3.06k
  }
2414
12
2415
12
  RTLIB::Libcall LC;
2416
12
  if (Op.getOpcode() == ISD::FP_TO_SINT)
2417
5
    LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2418
7
  else
2419
7
    LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2420
12
2421
12
  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2422
12
  return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2423
12
}
2424
2425
118k
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2426
118k
  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2427
118k
  // Any additional optimization in this function should be recorded
2428
118k
  // in the cost tables.
2429
118k
  EVT VT = Op.getValueType();
2430
118k
  SDLoc dl(Op);
2431
118k
  SDValue In = Op.getOperand(0);
2432
118k
  EVT InVT = In.getValueType();
2433
118k
2434
118k
  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2435
92
    MVT CastVT =
2436
92
        MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2437
92
                         InVT.getVectorNumElements());
2438
92
    In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2439
92
    return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2440
92
  }
2441
117k
2442
117k
  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2443
24.2k
    unsigned CastOpc =
2444
24.2k
        Op.getOpcode() == ISD::SINT_TO_FP ? 
ISD::SIGN_EXTEND16.7k
:
ISD::ZERO_EXTEND7.49k
;
2445
24.2k
    EVT CastVT = VT.changeVectorElementTypeToInteger();
2446
24.2k
    In = DAG.getNode(CastOpc, dl, CastVT, In);
2447
24.2k
    return DAG.getNode(Op.getOpcode(), dl, VT, In);
2448
24.2k
  }
2449
93.7k
2450
93.7k
  return Op;
2451
93.7k
}
2452
2453
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2454
127k
                                            SelectionDAG &DAG) const {
2455
127k
  if (Op.getValueType().isVector())
2456
118k
    return LowerVectorINT_TO_FP(Op, DAG);
2457
9.82k
2458
9.82k
  // f16 conversions are promoted to f32 when full fp16 is not supported.
2459
9.82k
  if (Op.getValueType() == MVT::f16 &&
2460
9.82k
      
!Subtarget->hasFullFP16()52
) {
2461
22
    SDLoc dl(Op);
2462
22
    return DAG.getNode(
2463
22
        ISD::FP_ROUND, dl, MVT::f16,
2464
22
        DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2465
22
        DAG.getIntPtrConstant(0, dl));
2466
22
  }
2467
9.80k
2468
9.80k
  // i128 conversions are libcalls.
2469
9.80k
  if (Op.getOperand(0).getValueType() == MVT::i128)
2470
6
    return SDValue();
2471
9.80k
2472
9.80k
  // Other conversions are legal, unless it's to the completely software-based
2473
9.80k
  // fp128.
2474
9.80k
  if (Op.getValueType() != MVT::f128)
2475
9.79k
    return Op;
2476
6
2477
6
  RTLIB::Libcall LC;
2478
6
  if (Op.getOpcode() == ISD::SINT_TO_FP)
2479
2
    LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2480
4
  else
2481
4
    LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2482
6
2483
6
  return LowerF128Call(Op, DAG, LC);
2484
6
}
2485
2486
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2487
29
                                            SelectionDAG &DAG) const {
2488
29
  // For iOS, we want to call an alternative entry point: __sincos_stret,
2489
29
  // which returns the values in two S / D registers.
2490
29
  SDLoc dl(Op);
2491
29
  SDValue Arg = Op.getOperand(0);
2492
29
  EVT ArgVT = Arg.getValueType();
2493
29
  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2494
29
2495
29
  ArgListTy Args;
2496
29
  ArgListEntry Entry;
2497
29
2498
29
  Entry.Node = Arg;
2499
29
  Entry.Ty = ArgTy;
2500
29
  Entry.IsSExt = false;
2501
29
  Entry.IsZExt = false;
2502
29
  Args.push_back(Entry);
2503
29
2504
29
  RTLIB::Libcall LC = ArgVT == MVT::f64 ? 
RTLIB::SINCOS_STRET_F6427
2505
29
                                        : 
RTLIB::SINCOS_STRET_F322
;
2506
29
  const char *LibcallName = getLibcallName(LC);
2507
29
  SDValue Callee =
2508
29
      DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2509
29
2510
29
  StructType *RetTy = StructType::get(ArgTy, ArgTy);
2511
29
  TargetLowering::CallLoweringInfo CLI(DAG);
2512
29
  CLI.setDebugLoc(dl)
2513
29
      .setChain(DAG.getEntryNode())
2514
29
      .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2515
29
2516
29
  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2517
29
  return CallResult.first;
2518
29
}
2519
2520
10
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2521
10
  if (Op.getValueType() != MVT::f16)
2522
0
    return SDValue();
2523
10
2524
10
  assert(Op.getOperand(0).getValueType() == MVT::i16);
2525
10
  SDLoc DL(Op);
2526
10
2527
10
  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2528
10
  Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2529
10
  return SDValue(
2530
10
      DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2531
10
                         DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2532
10
      0);
2533
10
}
2534
2535
0
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2536
0
  if (OrigVT.getSizeInBits() >= 64)
2537
0
    return OrigVT;
2538
0
2539
0
  assert(OrigVT.isSimple() && "Expecting a simple value type");
2540
0
2541
0
  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2542
0
  switch (OrigSimpleTy) {
2543
0
  default: llvm_unreachable("Unexpected Vector Type");
2544
0
  case MVT::v2i8:
2545
0
  case MVT::v2i16:
2546
0
     return MVT::v2i32;
2547
0
  case MVT::v4i8:
2548
0
    return  MVT::v4i16;
2549
0
  }
2550
0
}
2551
2552
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2553
                                                 const EVT &OrigTy,
2554
                                                 const EVT &ExtTy,
2555
801
                                                 unsigned ExtOpcode) {
2556
801
  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2557
801
  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2558
801
  // 64-bits we need to insert a new extension so that it will be 64-bits.
2559
801
  assert(ExtTy.is128BitVector() && "Unexpected extension size");
2560
801
  if (OrigTy.getSizeInBits() >= 64)
2561
801
    return N;
2562
0
2563
0
  // Must extend size to at least 64 bits to be used as an operand for VMULL.
2564
0
  EVT NewVT = getExtensionTo64Bits(OrigTy);
2565
0
2566
0
  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2567
0
}
2568
2569
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2570
52.5k
                                   bool isSigned) {
2571
52.5k
  EVT VT = N->getValueType(0);
2572
52.5k
2573
52.5k
  if (N->getOpcode() != ISD::BUILD_VECTOR)
2574
48.9k
    return false;
2575
3.54k
2576
7.16k
  
for (const SDValue &Elt : N->op_values())3.54k
{
2577
7.16k
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2578
6.80k
      unsigned EltSize = VT.getScalarSizeInBits();
2579
6.80k
      unsigned HalfSize = EltSize / 2;
2580
6.80k
      if (isSigned) {
2581
3.62k
        if (!isIntN(HalfSize, C->getSExtValue()))
2582
940
          return false;
2583
3.18k
      } else {
2584
3.18k
        if (!isUIntN(HalfSize, C->getZExtValue()))
2585
1.08k
          return false;
2586
4.78k
      }
2587
4.78k
      continue;
2588
4.78k
    }
2589
358
    return false;
2590
358
  }
2591
3.54k
2592
3.54k
  
return true1.15k
;
2593
3.54k
}
2594
2595
1.06k
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2596
1.06k
  if (N->getOpcode() == ISD::SIGN_EXTEND || 
N->getOpcode() == ISD::ZERO_EXTEND717
)
2597
801
    return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2598
801
                                             N->getOperand(0)->getValueType(0),
2599
801
                                             N->getValueType(0),
2600
801
                                             N->getOpcode());
2601
265
2602
265
  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
2603
265
  EVT VT = N->getValueType(0);
2604
265
  SDLoc dl(N);
2605
265
  unsigned EltSize = VT.getScalarSizeInBits() / 2;
2606
265
  unsigned NumElts = VT.getVectorNumElements();
2607
265
  MVT TruncVT = MVT::getIntegerVT(EltSize);
2608
265
  SmallVector<SDValue, 8> Ops;
2609
1.31k
  for (unsigned i = 0; i != NumElts; 
++i1.04k
) {
2610
1.04k
    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2611
1.04k
    const APInt &CInt = C->getAPIntValue();
2612
1.04k
    // Element types smaller than 32 bits are not legal, so use i32 elements.
2613
1.04k
    // The values are implicitly truncated so sext vs. zext doesn't matter.
2614
1.04k
    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2615
1.04k
  }
2616
265
  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2617
265
}
2618
2619
27.3k
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2620
27.3k
  return N->getOpcode() == ISD::SIGN_EXTEND ||
2621
27.3k
         
isExtendedBUILD_VECTOR(N, DAG, true)26.7k
;
2622
27.3k
}
2623
2624
26.7k
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2625
26.7k
  return N->getOpcode() == ISD::ZERO_EXTEND ||
2626
26.7k
         
isExtendedBUILD_VECTOR(N, DAG, false)25.7k
;
2627
26.7k
}
2628
2629
561
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2630
561
  unsigned Opcode = N->getOpcode();
2631
561
  if (Opcode == ISD::ADD || 
Opcode == ISD::SUB378
) {
2632
223
    SDNode *N0 = N->getOperand(0).getNode();
2633
223
    SDNode *N1 = N->getOperand(1).getNode();
2634
223
    return N0->hasOneUse() && 
N1->hasOneUse()133
&&
2635
223
      
isSignExtended(N0, DAG)121
&&
isSignExtended(N1, DAG)0
;
2636
223
  }
2637
338
  return false;
2638
338
}
2639
2640
762
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2641
762
  unsigned Opcode = N->getOpcode();
2642
762
  if (Opcode == ISD::ADD || 
Opcode == ISD::SUB680
) {
2643
90
    SDNode *N0 = N->getOperand(0).getNode();
2644
90
    SDNode *N1 = N->getOperand(1).getNode();
2645
90
    return N0->hasOneUse() && 
N1->hasOneUse()52
&&
2646
90
      
isZeroExtended(N0, DAG)48
&&
isZeroExtended(N1, DAG)12
;
2647
90
  }
2648
672
  return false;
2649
672
}
2650
2651
SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2652
1
                                                SelectionDAG &DAG) const {
2653
1
  // The rounding mode is in bits 23:22 of the FPSCR.
2654
1
  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2655
1
  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2656
1
  // so that the shift + and get folded into a bitfield extract.
2657
1
  SDLoc dl(Op);
2658
1
2659
1
  SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
2660
1
                                DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
2661
1
                                                MVT::i64));
2662
1
  SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2663
1
  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2664
1
                                  DAG.getConstant(1U << 22, dl, MVT::i32));
2665
1
  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2666
1
                              DAG.getConstant(22, dl, MVT::i32));
2667
1
  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2668
1
                     DAG.getConstant(3, dl, MVT::i32));
2669
1
}
2670
2671
8.04k
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2672
8.04k
  // Multiplications are only custom-lowered for 128-bit vectors so that
2673
8.04k
  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
2674
8.04k
  EVT VT = Op.getValueType();
2675
8.04k
  assert(VT.is128BitVector() && VT.isInteger() &&
2676
8.04k
         "unexpected type for custom-lowering ISD::MUL");
2677
8.04k
  SDNode *N0 = Op.getOperand(0).getNode();
2678
8.04k
  SDNode *N1 = Op.getOperand(1).getNode();
2679
8.04k
  unsigned NewOpc = 0;
2680
8.04k
  bool isMLA = false;
2681
8.04k
  bool isN0SExt = isSignExtended(N0, DAG);
2682
8.04k
  bool isN1SExt = isSignExtended(N1, DAG);
2683
8.04k
  if (isN0SExt && 
isN1SExt220
)
2684
214
    NewOpc = AArch64ISD::SMULL;
2685
7.83k
  else {
2686
7.83k
    bool isN0ZExt = isZeroExtended(N0, DAG);
2687
7.83k
    bool isN1ZExt = isZeroExtended(N1, DAG);
2688
7.83k
    if (isN0ZExt && 
isN1ZExt304
)
2689
301
      NewOpc = AArch64ISD::UMULL;
2690
7.53k
    else if (isN1SExt || 
isN1ZExt6.97k
) {
2691
1.03k
      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2692
1.03k
      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2693
1.03k
      if (isN1SExt && 
isAddSubSExt(N0, DAG)561
) {
2694
0
        NewOpc = AArch64ISD::SMULL;
2695
0
        isMLA = true;
2696
1.03k
      } else if (isN1ZExt && 
isAddSubZExt(N0, DAG)762
) {
2697
12
        NewOpc =  AArch64ISD::UMULL;
2698
12
        isMLA = true;
2699
1.01k
      } else if (isN0ZExt && 
isAddSubZExt(N1, DAG)0
) {
2700
0
        std::swap(N0, N1);
2701
0
        NewOpc =  AArch64ISD::UMULL;
2702
0
        isMLA = true;
2703
0
      }
2704
1.03k
    }
2705
7.83k
2706
7.83k
    if (!NewOpc) {
2707
7.52k
      if (VT == MVT::v2i64)
2708
283
        // Fall through to expand this.  It is not legal.
2709
283
        return SDValue();
2710
7.23k
      else
2711
7.23k
        // Other vector multiplications are legal.
2712
7.23k
        return Op;
2713
527
    }
2714
7.83k
  }
2715
527
2716
527
  // Legalize to a S/UMULL instruction
2717
527
  SDLoc DL(Op);
2718
527
  SDValue Op0;
2719
527
  SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2720
527
  if (!isMLA) {
2721
515
    Op0 = skipExtensionForVectorMULL(N0, DAG);
2722
515
    assert(Op0.getValueType().is64BitVector() &&
2723
515
           Op1.getValueType().is64BitVector() &&
2724
515
           "unexpected types for extended operands to VMULL");
2725
515
    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2726
515
  }
2727
12
  // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2728
12
  // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2729
12
  // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2730
12
  SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2731
12
  SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2732
12
  EVT Op1VT = Op1.getValueType();
2733
12
  return DAG.getNode(N0->getOpcode(), DL, VT,
2734
12
                     DAG.getNode(NewOpc, DL, VT,
2735
12
                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2736
12
                     DAG.getNode(NewOpc, DL, VT,
2737
12
                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2738
12
}
2739
2740
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2741
11.3k
                                                     SelectionDAG &DAG) const {
2742
11.3k
  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2743
11.3k
  SDLoc dl(Op);
2744
11.3k
  switch (IntNo) {
2745
11.3k
  
default: return SDValue()11.0k
; // Don't custom lower most intrinsics.
2746
11.3k
  case Intrinsic::thread_pointer: {
2747
9
    EVT PtrVT = getPointerTy(DAG.getDataLayout());
2748
9
    return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2749
11.3k
  }
2750
11.3k
  case Intrinsic::aarch64_neon_abs: {
2751
63
    EVT Ty = Op.getValueType();
2752
63
    if (Ty == MVT::i64) {
2753
2
      SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
2754
2
                                   Op.getOperand(1));
2755
2
      Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
2756
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
2757
61
    } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
2758
61
      return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
2759
61
    } else {
2760
0
      report_fatal_error("Unexpected type for AArch64 NEON intrinic");
2761
0
    }
2762
0
  }
2763
63
  case Intrinsic::aarch64_neon_smax:
2764
63
    return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2765
63
                       Op.getOperand(1), Op.getOperand(2));
2766
62
  case Intrinsic::aarch64_neon_umax:
2767
62
    return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2768
62
                       Op.getOperand(1), Op.getOperand(2));
2769
62
  case Intrinsic::aarch64_neon_smin:
2770
62
    return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2771
62
                       Op.getOperand(1), Op.getOperand(2));
2772
62
  case Intrinsic::aarch64_neon_umin:
2773
62
    return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2774
62
                       Op.getOperand(1), Op.getOperand(2));
2775
0
2776
10
  case Intrinsic::localaddress: {
2777
10
    const auto &MF = DAG.getMachineFunction();
2778
10
    const auto *RegInfo = Subtarget->getRegisterInfo();
2779
10
    unsigned Reg = RegInfo->getLocalAddressRegister(MF);
2780
10
    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
2781
10
                              Op.getSimpleValueType());
2782
0
  }
2783
0
2784
1
  case Intrinsic::eh_recoverfp: {
2785
1
    // FIXME: This needs to be implemented to correctly handle highly aligned
2786
1
    // stack objects. For now we simply return the incoming FP. Refer D53541
2787
1
    // for more details.
2788
1
    SDValue FnOp = Op.getOperand(1);
2789
1
    SDValue IncomingFPOp = Op.getOperand(2);
2790
1
    GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
2791
1
    auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : 
nullptr0
);
2792
1
    if (!Fn)
2793
0
      report_fatal_error(
2794
0
          "llvm.eh.recoverfp must take a function as the first argument");
2795
1
    return IncomingFPOp;
2796
1
  }
2797
11.3k
  }
2798
11.3k
}
2799
2800
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
2801
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
2802
                                        EVT VT, EVT MemVT,
2803
187
                                        SelectionDAG &DAG) {
2804
187
  assert(VT.isVector() && "VT should be a vector type");
2805
187
  assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
2806
187
2807
187
  SDValue Value = ST->getValue();
2808
187
2809
187
  // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
2810
187
  // the word lane which represent the v4i8 subvector.  It optimizes the store
2811
187
  // to:
2812
187
  //
2813
187
  //   xtn  v0.8b, v0.8h
2814
187
  //   str  s0, [x0]
2815
187
2816
187
  SDValue Undef = DAG.getUNDEF(MVT::i16);
2817
187
  SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
2818
187
                                        {Undef, Undef, Undef, Undef});
2819
187
2820
187
  SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
2821
187
                                 Value, UndefVec);
2822
187
  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
2823
187
2824
187
  Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
2825
187
  SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
2826
187
                                     Trunc, DAG.getConstant(0, DL, MVT::i64));
2827
187
2828
187
  return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
2829
187
                      ST->getBasePtr(), ST->getMemOperand());
2830
187
}
2831
2832
// Custom lowering for any store, vector or scalar and/or default or with
2833
// a truncate operations.  Currently only custom lower truncate operation
2834
// from vector v4i16 to v4i8.
2835
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
2836
187
                                          SelectionDAG &DAG) const {
2837
187
  SDLoc Dl(Op);
2838
187
  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
2839
187
  assert (StoreNode && "Can only custom lower store nodes");
2840
187
2841
187
  SDValue Value = StoreNode->getValue();
2842
187
2843
187
  EVT VT = Value.getValueType();
2844
187
  EVT MemVT = StoreNode->getMemoryVT();
2845
187
2846
187
  assert (VT.isVector() && "Can only custom lower vector store types");
2847
187
2848
187
  unsigned AS = StoreNode->getAddressSpace();
2849
187
  unsigned Align = StoreNode->getAlignment();
2850
187
  if (Align < MemVT.getStoreSize() &&
2851
187
      !allowsMisalignedMemoryAccesses(
2852
181
          MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
2853
0
    return scalarizeVectorStore(StoreNode, DAG);
2854
0
  }
2855
187
2856
187
  if (StoreNode->isTruncatingStore()) {
2857
187
    return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
2858
187
  }
2859
0
2860
0
  return SDValue();
2861
0
}
2862
2863
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2864
1.08M
                                              SelectionDAG &DAG) const {
2865
1.08M
  LLVM_DEBUG(dbgs() << "Custom lowering: ");
2866
1.08M
  LLVM_DEBUG(Op.dump());
2867
1.08M
2868
1.08M
  switch (Op.getOpcode()) {
2869
1.08M
  default:
2870
0
    llvm_unreachable("unimplemented operand");
2871
1.08M
    
return SDValue()0
;
2872
1.08M
  case ISD::BITCAST:
2873
10
    return LowerBITCAST(Op, DAG);
2874
1.08M
  case ISD::GlobalAddress:
2875
226k
    return LowerGlobalAddress(Op, DAG);
2876
1.08M
  case ISD::GlobalTLSAddress:
2877
130
    return LowerGlobalTLSAddress(Op, DAG);
2878
1.08M
  case ISD::SETCC:
2879
11.6k
    return LowerSETCC(Op, DAG);
2880
1.08M
  case ISD::BR_CC:
2881
366k
    return LowerBR_CC(Op, DAG);
2882
1.08M
  case ISD::SELECT:
2883
916
    return LowerSELECT(Op, DAG);
2884
1.08M
  case ISD::SELECT_CC:
2885
15.4k
    return LowerSELECT_CC(Op, DAG);
2886
1.08M
  case ISD::JumpTable:
2887
1.66k
    return LowerJumpTable(Op, DAG);
2888
1.08M
  case ISD::BR_JT:
2889
1.66k
    return LowerBR_JT(Op, DAG);
2890
1.08M
  case ISD::ConstantPool:
2891
30.5k
    return LowerConstantPool(Op, DAG);
2892
1.08M
  case ISD::BlockAddress:
2893
7
    return LowerBlockAddress(Op, DAG);
2894
1.08M
  case ISD::VASTART:
2895
74
    return LowerVASTART(Op, DAG);
2896
1.08M
  case ISD::VACOPY:
2897
44
    return LowerVACOPY(Op, DAG);
2898
1.08M
  case ISD::VAARG:
2899
83
    return LowerVAARG(Op, DAG);
2900
1.08M
  case ISD::ADDC:
2901
174
  case ISD::ADDE:
2902
174
  case ISD::SUBC:
2903
174
  case ISD::SUBE:
2904
174
    return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2905
1.41k
  case ISD::SADDO:
2906
1.41k
  case ISD::UADDO:
2907
1.41k
  case ISD::SSUBO:
2908
1.41k
  case ISD::USUBO:
2909
1.41k
  case ISD::SMULO:
2910
1.41k
  case ISD::UMULO:
2911
1.41k
    return LowerXALUO(Op, DAG);
2912
1.41k
  case ISD::FADD:
2913
79
    return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2914
1.41k
  case ISD::FSUB:
2915
2
    return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2916
1.41k
  case ISD::FMUL:
2917
4
    return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2918
1.41k
  case ISD::FDIV:
2919
1
    return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2920
1.41k
  case ISD::FP_ROUND:
2921
794
    return LowerFP_ROUND(Op, DAG);
2922
1.41k
  case ISD::FP_EXTEND:
2923
2
    return LowerFP_EXTEND(Op, DAG);
2924
1.41k
  case ISD::FRAMEADDR:
2925
1.32k
    return LowerFRAMEADDR(Op, DAG);
2926
1.41k
  case ISD::SPONENTRY:
2927
10
    return LowerSPONENTRY(Op, DAG);
2928
1.41k
  case ISD::RETURNADDR:
2929
1.35k
    return LowerRETURNADDR(Op, DAG);
2930
1.41k
  case ISD::ADDROFRETURNADDR:
2931
2
    return LowerADDROFRETURNADDR(Op, DAG);
2932
43.3k
  case ISD::INSERT_VECTOR_ELT:
2933
43.3k
    return LowerINSERT_VECTOR_ELT(Op, DAG);
2934
87.1k
  case ISD::EXTRACT_VECTOR_ELT:
2935
87.1k
    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2936
84.3k
  case ISD::BUILD_VECTOR:
2937
84.3k
    return LowerBUILD_VECTOR(Op, DAG);
2938
2.37k
  case ISD::VECTOR_SHUFFLE:
2939
2.37k
    return LowerVECTOR_SHUFFLE(Op, DAG);
2940
17.3k
  case ISD::EXTRACT_SUBVECTOR:
2941
17.3k
    return LowerEXTRACT_SUBVECTOR(Op, DAG);
2942
14.4k
  case ISD::SRA:
2943
14.4k
  case ISD::SRL:
2944
14.4k
  case ISD::SHL:
2945
14.4k
    return LowerVectorSRA_SRL_SHL(Op, DAG);
2946
14.4k
  case ISD::SHL_PARTS:
2947
12
    return LowerShiftLeftParts(Op, DAG);
2948
14.4k
  case ISD::SRL_PARTS:
2949
12
  case ISD::SRA_PARTS:
2950
12
    return LowerShiftRightParts(Op, DAG);
2951
112
  case ISD::CTPOP:
2952
112
    return LowerCTPOP(Op, DAG);
2953
180
  case ISD::FCOPYSIGN:
2954
180
    return LowerFCOPYSIGN(Op, DAG);
2955
4.83k
  case ISD::OR:
2956
4.83k
    return LowerVectorOR(Op, DAG);
2957
14.3k
  case ISD::XOR:
2958
14.3k
    return LowerXOR(Op, DAG);
2959
82
  case ISD::PREFETCH:
2960
82
    return LowerPREFETCH(Op, DAG);
2961
127k
  case ISD::SINT_TO_FP:
2962
127k
  case ISD::UINT_TO_FP:
2963
127k
    return LowerINT_TO_FP(Op, DAG);
2964
127k
  case ISD::FP_TO_SINT:
2965
3.91k
  case ISD::FP_TO_UINT:
2966
3.91k
    return LowerFP_TO_INT(Op, DAG);
2967
3.91k
  case ISD::FSINCOS:
2968
29
    return LowerFSINCOS(Op, DAG);
2969
3.91k
  case ISD::FLT_ROUNDS_:
2970
1
    return LowerFLT_ROUNDS_(Op, DAG);
2971
8.04k
  case ISD::MUL:
2972
8.04k
    return LowerMUL(Op, DAG);
2973
11.3k
  case ISD::INTRINSIC_WO_CHAIN:
2974
11.3k
    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2975
3.91k
  case ISD::STORE:
2976
187
    return LowerSTORE(Op, DAG);
2977
3.91k
  case ISD::VECREDUCE_ADD:
2978
797
  case ISD::VECREDUCE_SMAX:
2979
797
  case ISD::VECREDUCE_SMIN:
2980
797
  case ISD::VECREDUCE_UMAX:
2981
797
  case ISD::VECREDUCE_UMIN:
2982
797
  case ISD::VECREDUCE_FMAX:
2983
797
  case ISD::VECREDUCE_FMIN:
2984
797
    return LowerVECREDUCE(Op, DAG);
2985
797
  case ISD::ATOMIC_LOAD_SUB:
2986
176
    return LowerATOMIC_LOAD_SUB(Op, DAG);
2987
797
  case ISD::ATOMIC_LOAD_AND:
2988
176
    return LowerATOMIC_LOAD_AND(Op, DAG);
2989
797
  case ISD::DYNAMIC_STACKALLOC:
2990
5
    return LowerDYNAMIC_STACKALLOC(Op, DAG);
2991
1.08M
  }
2992
1.08M
}
2993
2994
//===----------------------------------------------------------------------===//
2995
//                      Calling Convention Implementation
2996
//===----------------------------------------------------------------------===//
2997
2998
/// Selects the correct CCAssignFn for a given CallingConvention value.
2999
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
3000
3.58M
                                                     bool IsVarArg) const {
3001
3.58M
  switch (CC) {
3002
3.58M
  default:
3003
0
    report_fatal_error("Unsupported calling convention.");
3004
3.58M
  case CallingConv::WebKit_JS:
3005
106
    return CC_AArch64_WebKit_JS;
3006
3.58M
  case CallingConv::GHC:
3007
11
    return CC_AArch64_GHC;
3008
3.58M
  case CallingConv::C:
3009
3.57M
  case CallingConv::Fast:
3010
3.57M
  case CallingConv::PreserveMost:
3011
3.57M
  case CallingConv::CXX_FAST_TLS:
3012
3.57M
  case CallingConv::Swift:
3013
3.57M
    if (Subtarget->isTargetWindows() && 
IsVarArg327
)
3014
18
      return CC_AArch64_Win64_VarArg;
3015
3.57M
    if (!Subtarget->isTargetDarwin())
3016
29.3k
      return CC_AArch64_AAPCS;
3017
3.55M
    return IsVarArg ? 
CC_AArch64_DarwinPCS_VarArg1.27M
:
CC_AArch64_DarwinPCS2.27M
;
3018
3.55M
  case CallingConv::Win64:
3019
26
    return IsVarArg ? 
CC_AArch64_Win64_VarArg0
: CC_AArch64_AAPCS;
3020
3.55M
  case CallingConv::AArch64_VectorCall:
3021
0
    return CC_AArch64_AAPCS;
3022
3.58M
  }
3023
3.58M
}
3024
3025
CCAssignFn *
3026
1.51M
AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
3027
1.51M
  return CC == CallingConv::WebKit_JS ? 
RetCC_AArch64_WebKit_JS0
3028
1.51M
                                      : RetCC_AArch64_AAPCS;
3029
1.51M
}
3030
3031
SDValue AArch64TargetLowering::LowerFormalArguments(
3032
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3033
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3034
37.3k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3035
37.3k
  MachineFunction &MF = DAG.getMachineFunction();
3036
37.3k
  MachineFrameInfo &MFI = MF.getFrameInfo();
3037
37.3k
  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3038
37.3k
3039
37.3k
  // Assign locations to all of the incoming arguments.
3040
37.3k
  SmallVector<CCValAssign, 16> ArgLocs;
3041
37.3k
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3042
37.3k
                 *DAG.getContext());
3043
37.3k
3044
37.3k
  // At this point, Ins[].VT may already be promoted to i32. To correctly
3045
37.3k
  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3046
37.3k
  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3047
37.3k
  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
3048
37.3k
  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
3049
37.3k
  // LocVT.
3050
37.3k
  unsigned NumArgs = Ins.size();
3051
37.3k
  Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3052
37.3k
  unsigned CurArgIdx = 0;
3053
129k
  for (unsigned i = 0; i != NumArgs; 
++i92.0k
) {
3054
92.0k
    MVT ValVT = Ins[i].VT;
3055
92.0k
    if (Ins[i].isOrigArg()) {
3056
92.0k
      std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
3057
92.0k
      CurArgIdx = Ins[i].getOrigArgIndex();
3058
92.0k
3059
92.0k
      // Get type of the original argument.
3060
92.0k
      EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3061
92.0k
                                  /*AllowUnknown*/ true);
3062
92.0k
      MVT ActualMVT = ActualVT.isSimple() ? 
ActualVT.getSimpleVT()91.8k
:
MVT::Other197
;
3063
92.0k
      // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3064
92.0k
      if (ActualMVT == MVT::i1 || 
ActualMVT == MVT::i891.6k
)
3065
1.17k
        ValVT = MVT::i8;
3066
90.8k
      else if (ActualMVT == MVT::i16)
3067
2.26k
        ValVT = MVT::i16;
3068
92.0k
    }
3069
92.0k
    CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3070
92.0k
    bool Res =
3071
92.0k
        AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3072
92.0k
    assert(!Res && "Call operand has unhandled type");
3073
92.0k
    (void)Res;
3074
92.0k
  }
3075
37.3k
  assert(ArgLocs.size() == Ins.size());
3076
37.3k
  SmallVector<SDValue, 16> ArgValues;
3077
129k
  for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i92.0k
) {
3078
92.0k
    CCValAssign &VA = ArgLocs[i];
3079
92.0k
3080
92.0k
    if (Ins[i].Flags.isByVal()) {
3081
14
      // Byval is used for HFAs in the PCS, but the system should work in a
3082
14
      // non-compliant manner for larger structs.
3083
14
      EVT PtrVT = getPointerTy(DAG.getDataLayout());
3084
14
      int Size = Ins[i].Flags.getByValSize();
3085
14
      unsigned NumRegs = (Size + 7) / 8;
3086
14
3087
14
      // FIXME: This works on big-endian for composite byvals, which are the common
3088
14
      // case. It should also work for fundamental types too.
3089
14
      unsigned FrameIdx =
3090
14
        MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3091
14
      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3092
14
      InVals.push_back(FrameIdxN);
3093
14
3094
14
      continue;
3095
14
    }
3096
92.0k
3097
92.0k
    if (VA.isRegLoc()) {
3098
84.6k
      // Arguments stored in registers.
3099
84.6k
      EVT RegVT = VA.getLocVT();
3100
84.6k
3101
84.6k
      SDValue ArgValue;
3102
84.6k
      const TargetRegisterClass *RC;
3103
84.6k
3104
84.6k
      if (RegVT == MVT::i32)
3105
12.7k
        RC = &AArch64::GPR32RegClass;
3106
71.9k
      else if (RegVT == MVT::i64)
3107
58.5k
        RC = &AArch64::GPR64RegClass;
3108
13.3k
      else if (RegVT == MVT::f16)
3109
413
        RC = &AArch64::FPR16RegClass;
3110
12.9k
      else if (RegVT == MVT::f32)
3111
945
        RC = &AArch64::FPR32RegClass;
3112
12.0k
      else if (RegVT == MVT::f64 || 
RegVT.is64BitVector()10.9k
)
3113
5.13k
        RC = &AArch64::FPR64RegClass;
3114
6.88k
      else if (RegVT == MVT::f128 || 
RegVT.is128BitVector()6.70k
)
3115
6.88k
        RC = &AArch64::FPR128RegClass;
3116
6.88k
      else
3117
6.88k
        
llvm_unreachable0
("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3118
84.6k
3119
84.6k
      // Transform the arguments in physical registers into virtual ones.
3120
84.6k
      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3121
84.6k
      ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3122
84.6k
3123
84.6k
      // If this is an 8, 16 or 32-bit value, it is really passed promoted
3124
84.6k
      // to 64 bits.  Insert an assert[sz]ext to capture this, then
3125
84.6k
      // truncate to the right size.
3126
84.6k
      switch (VA.getLocInfo()) {
3127
84.6k
      default:
3128
0
        llvm_unreachable("Unknown loc info!");
3129
84.6k
      case CCValAssign::Full:
3130
79.3k
        break;
3131
84.6k
      case CCValAssign::BCvt:
3132
1.95k
        ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3133
1.95k
        break;
3134
84.6k
      case CCValAssign::AExt:
3135
3.40k
      case CCValAssign::SExt:
3136
3.40k
      case CCValAssign::ZExt:
3137
3.40k
        // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
3138
3.40k
        // nodes after our lowering.
3139
3.40k
        assert(RegVT == Ins[i].VT && "incorrect register location selected");
3140
3.40k
        break;
3141
84.6k
      }
3142
84.6k
3143
84.6k
      InVals.push_back(ArgValue);
3144
84.6k
3145
84.6k
    } else { // VA.isRegLoc()
3146
7.33k
      assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
3147
7.33k
      unsigned ArgOffset = VA.getLocMemOffset();
3148
7.33k
      unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
3149
7.33k
3150
7.33k
      uint32_t BEAlign = 0;
3151
7.33k
      if (!Subtarget->isLittleEndian() && 
ArgSize < 86
&&
3152
7.33k
          
!Ins[i].Flags.isInConsecutiveRegs()6
)
3153
4
        BEAlign = 8 - ArgSize;
3154
7.33k
3155
7.33k
      int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3156
7.33k
3157
7.33k
      // Create load nodes to retrieve arguments from the stack.
3158
7.33k
      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3159
7.33k
      SDValue ArgValue;
3160
7.33k
3161
7.33k
      // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3162
7.33k
      ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3163
7.33k
      MVT MemVT = VA.getValVT();
3164
7.33k
3165
7.33k
      switch (VA.getLocInfo()) {
3166
7.33k
      default:
3167
7.28k
        break;
3168
7.33k
      case CCValAssign::BCvt:
3169
15
        MemVT = VA.getLocVT();
3170
15
        break;
3171
7.33k
      case CCValAssign::SExt:
3172
18
        ExtType = ISD::SEXTLOAD;
3173
18
        break;
3174
7.33k
      case CCValAssign::ZExt:
3175
9
        ExtType = ISD::ZEXTLOAD;
3176
9
        break;
3177
7.33k
      case CCValAssign::AExt:
3178
9
        ExtType = ISD::EXTLOAD;
3179
9
        break;
3180
7.33k
      }
3181
7.33k
3182
7.33k
      ArgValue = DAG.getExtLoad(
3183
7.33k
          ExtType, DL, VA.getLocVT(), Chain, FIN,
3184
7.33k
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3185
7.33k
          MemVT);
3186
7.33k
3187
7.33k
      InVals.push_back(ArgValue);
3188
7.33k
    }
3189
92.0k
  }
3190
37.3k
3191
37.3k
  // varargs
3192
37.3k
  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3193
37.3k
  if (isVarArg) {
3194
80
    if (!Subtarget->isTargetDarwin() || 
IsWin6452
) {
3195
28
      // The AAPCS variadic function ABI is identical to the non-variadic
3196
28
      // one. As a result there may be more arguments in registers and we should
3197
28
      // save them for future reference.
3198
28
      // Win64 variadic functions also pass arguments in registers, but all float
3199
28
      // arguments are passed in integer registers.
3200
28
      saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3201
28
    }
3202
80
3203
80
    // This will point to the next argument passed via stack.
3204
80
    unsigned StackOffset = CCInfo.getNextStackOffset();
3205
80
    // We currently pass all varargs at 8-byte alignment.
3206
80
    StackOffset = ((StackOffset + 7) & ~7);
3207
80
    FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3208
80
3209
80
    if (MFI.hasMustTailInVarArgFunc()) {
3210
2
      SmallVector<MVT, 2> RegParmTypes;
3211
2
      RegParmTypes.push_back(MVT::i64);
3212
2
      RegParmTypes.push_back(MVT::f128);
3213
2
      // Compute the set of forwarded registers. The rest are scratch.
3214
2
      SmallVectorImpl<ForwardedRegister> &Forwards =
3215
2
                                       FuncInfo->getForwardedMustTailRegParms();
3216
2
      CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
3217
2
                                               CC_AArch64_AAPCS);
3218
2
3219
2
      // Conservatively forward X8, since it might be used for aggregate return.
3220
2
      if (!CCInfo.isAllocated(AArch64::X8)) {
3221
2
        unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
3222
2
        Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
3223
2
      }
3224
2
    }
3225
80
  }
3226
37.3k
3227
37.3k
  // On Windows, InReg pointers must be returned, so record the pointer in a
3228
37.3k
  // virtual register at the start of the function so it can be returned in the
3229
37.3k
  // epilogue.
3230
37.3k
  if (IsWin64) {
3231
284
    for (unsigned I = 0, E = Ins.size(); I != E; 
++I164
) {
3232
167
      if (Ins[I].Flags.isInReg()) {
3233
3
        assert(!FuncInfo->getSRetReturnReg());
3234
3
3235
3
        MVT PtrTy = getPointerTy(DAG.getDataLayout());
3236
3
        unsigned Reg =
3237
3
          MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3238
3
        FuncInfo->setSRetReturnReg(Reg);
3239
3
3240
3
        SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
3241
3
        Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
3242
3
        break;
3243
3
      }
3244
167
    }
3245
120
  }
3246
37.3k
3247
37.3k
  unsigned StackArgSize = CCInfo.getNextStackOffset();
3248
37.3k
  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3249
37.3k
  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3250
21
    // This is a non-standard ABI so by fiat I say we're allowed to make full
3251
21
    // use of the stack area to be popped, which must be aligned to 16 bytes in
3252
21
    // any case:
3253
21
    StackArgSize = alignTo(StackArgSize, 16);
3254
21
3255
21
    // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3256
21
    // a multiple of 16.
3257
21
    FuncInfo->setArgumentStackToRestore(StackArgSize);
3258
21
3259
21
    // This realignment carries over to the available bytes below. Our own
3260
21
    // callers will guarantee the space is free by giving an aligned value to
3261
21
    // CALLSEQ_START.
3262
21
  }
3263
37.3k
  // Even if we're not expected to free up the space, it's useful to know how
3264
37.3k
  // much is there while considering tail calls (because we can reuse it).
3265
37.3k
  FuncInfo->setBytesInStackArgArea(StackArgSize);
3266
37.3k
3267
37.3k
  if (Subtarget->hasCustomCallingConv())
3268
36
    Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3269
37.3k
3270
37.3k
  return Chain;
3271
37.3k
}
3272
3273
void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3274
                                                SelectionDAG &DAG,
3275
                                                const SDLoc &DL,
3276
28
                                                SDValue &Chain) const {
3277
28
  MachineFunction &MF = DAG.getMachineFunction();
3278
28
  MachineFrameInfo &MFI = MF.getFrameInfo();
3279
28
  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3280
28
  auto PtrVT = getPointerTy(DAG.getDataLayout());
3281
28
  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3282
28
3283
28
  SmallVector<SDValue, 8> MemOps;
3284
28
3285
28
  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3286
28
                                          AArch64::X3, AArch64::X4, AArch64::X5,
3287
28
                                          AArch64::X6, AArch64::X7 };
3288
28
  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3289
28
  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3290
28
3291
28
  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3292
28
  int GPRIdx = 0;
3293
28
  if (GPRSaveSize != 0) {
3294
22
    if (IsWin64) {
3295
15
      GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3296
15
      if (GPRSaveSize & 15)
3297
13
        // The extra size here, if triggered, will always be 8.
3298
13
        MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3299
15
    } else
3300
7
      GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3301
22
3302
22
    SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3303
22
3304
156
    for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; 
++i134
) {
3305
134
      unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3306
134
      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3307
134
      SDValue Store = DAG.getStore(
3308
134
          Val.getValue(1), DL, Val, FIN,
3309
134
          IsWin64
3310
134
              ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3311
87
                                                  GPRIdx,
3312
87
                                                  (i - FirstVariadicGPR) * 8)
3313
134
              : 
MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)47
);
3314
134
      MemOps.push_back(Store);
3315
134
      FIN =
3316
134
          DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3317
134
    }
3318
22
  }
3319
28
  FuncInfo->setVarArgsGPRIndex(GPRIdx);
3320
28
  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3321
28
3322
28
  if (Subtarget->hasFPARMv8() && 
!IsWin6427
) {
3323
8
    static const MCPhysReg FPRArgRegs[] = {
3324
8
        AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3325
8
        AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3326
8
    static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3327
8
    unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3328
8
3329
8
    unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3330
8
    int FPRIdx = 0;
3331
8
    if (FPRSaveSize != 0) {
3332
7
      FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3333
7
3334
7
      SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3335
7
3336
59
      for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; 
++i52
) {
3337
52
        unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3338
52
        SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3339
52
3340
52
        SDValue Store = DAG.getStore(
3341
52
            Val.getValue(1), DL, Val, FIN,
3342
52
            MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3343
52
        MemOps.push_back(Store);
3344
52
        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3345
52
                          DAG.getConstant(16, DL, PtrVT));
3346
52
      }
3347
7
    }
3348
8
    FuncInfo->setVarArgsFPRIndex(FPRIdx);
3349
8
    FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3350
8
  }
3351
28
3352
28
  if (!MemOps.empty()) {
3353
23
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3354
23
  }
3355
28
}
3356
3357
/// LowerCallResult - Lower the result values of a call into the
3358
/// appropriate copies out of appropriate physical registers.
3359
SDValue AArch64TargetLowering::LowerCallResult(
3360
    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3361
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3362
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3363
196k
    SDValue ThisVal) const {
3364
196k
  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3365
196k
                          ? 
RetCC_AArch64_WebKit_JS8
3366
196k
                          : 
RetCC_AArch64_AAPCS196k
;
3367
196k
  // Assign locations to each value returned by this call.
3368
196k
  SmallVector<CCValAssign, 16> RVLocs;
3369
196k
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3370
196k
                 *DAG.getContext());
3371
196k
  CCInfo.AnalyzeCallResult(Ins, RetCC);
3372
196k
3373
196k
  // Copy all of the result registers out of their specified physreg.
3374
307k
  for (unsigned i = 0; i != RVLocs.size(); 
++i111k
) {
3375
111k
    CCValAssign VA = RVLocs[i];
3376
111k
3377
111k
    // Pass 'this' value directly from the argument to return value, to avoid
3378
111k
    // reg unit interference
3379
111k
    if (i == 0 && 
isThisReturn110k
) {
3380
3.55k
      assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
3381
3.55k
             "unexpected return calling convention register assignment");
3382
3.55k
      InVals.push_back(ThisVal);
3383
3.55k
      continue;
3384
3.55k
    }
3385
107k
3386
107k
    SDValue Val =
3387
107k
        DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3388
107k
    Chain = Val.getValue(1);
3389
107k
    InFlag = Val.getValue(2);
3390
107k
3391
107k
    switch (VA.getLocInfo()) {
3392
107k
    default:
3393
0
      llvm_unreachable("Unknown loc info!");
3394
107k
    case CCValAssign::Full:
3395
107k
      break;
3396
107k
    case CCValAssign::BCvt:
3397
136
      Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3398
136
      break;
3399
107k
    }
3400
107k
3401
107k
    InVals.push_back(Val);
3402
107k
  }
3403
196k
3404
196k
  return Chain;
3405
196k
}
3406
3407
/// Return true if the calling convention is one that we can guarantee TCO for.
3408
82
static bool canGuaranteeTCO(CallingConv::ID CC) {
3409
82
  return CC == CallingConv::Fast;
3410
82
}
3411
3412
/// Return true if we might ever do TCO for calls with this calling convention.
3413
53.5k
static bool mayTailCallThisCC(CallingConv::ID CC) {
3414
53.5k
  switch (CC) {
3415
53.5k
  case CallingConv::C:
3416
53.4k
  case CallingConv::PreserveMost:
3417
53.4k
  case CallingConv::Swift:
3418
53.4k
    return true;
3419
53.4k
  default:
3420
74
    return canGuaranteeTCO(CC);
3421
53.5k
  }
3422
53.5k
}
3423
3424
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3425
    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3426
    const SmallVectorImpl<ISD::OutputArg> &Outs,
3427
    const SmallVectorImpl<SDValue> &OutVals,
3428
53.5k
    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3429
53.5k
  if (!mayTailCallThisCC(CalleeCC))
3430
3
    return false;
3431
53.5k
3432
53.5k
  MachineFunction &MF = DAG.getMachineFunction();
3433
53.5k
  const Function &CallerF = MF.getFunction();
3434
53.5k
  CallingConv::ID CallerCC = CallerF.getCallingConv();
3435
53.5k
  bool CCMatch = CallerCC == CalleeCC;
3436
53.5k
3437
53.5k
  // Byval parameters hand the function a pointer directly into the stack area
3438
53.5k
  // we want to reuse during a tail call. Working around this *is* possible (see
3439
53.5k
  // X86) but less efficient and uglier in LowerCall.
3440
53.5k
  for (Function::const_arg_iterator i = CallerF.arg_begin(),
3441
53.5k
                                    e = CallerF.arg_end();
3442
598k
       i != e; 
++i545k
) {
3443
545k
    if (i->hasByValAttr())
3444
0
      return false;
3445
545k
3446
545k
    // On Windows, "inreg" attributes signify non-aggregate indirect returns.
3447
545k
    // In this case, it is necessary to save/restore X0 in the callee. Tail
3448
545k
    // call opt interferes with this. So we disable tail call opt when the
3449
545k
    // caller has an argument with "inreg" attribute.
3450
545k
3451
545k
    // FIXME: Check whether the callee also has an "inreg" argument.
3452
545k
    if (i->hasInRegAttr())
3453
1
      return false;
3454
545k
  }
3455
53.5k
3456
53.5k
  
if (53.5k
getTargetMachine().Options.GuaranteedTailCallOpt53.5k
)
3457
8
    return canGuaranteeTCO(CalleeCC) && 
CCMatch7
;
3458
53.5k
3459
53.5k
  // Externally-defined functions with weak linkage should not be
3460
53.5k
  // tail-called on AArch64 when the OS does not support dynamic
3461
53.5k
  // pre-emption of symbols, as the AAELF spec requires normal calls
3462
53.5k
  // to undefined weak functions to be replaced with a NOP or jump to the
3463
53.5k
  // next instruction. The behaviour of branch instructions in this
3464
53.5k
  // situation (as used for tail calls) is implementation-defined, so we
3465
53.5k
  // cannot rely on the linker replacing the tail call with a return.
3466
53.5k
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3467
53.4k
    const GlobalValue *GV = G->getGlobal();
3468
53.4k
    const Triple &TT = getTargetMachine().getTargetTriple();
3469
53.4k
    if (GV->hasExternalWeakLinkage() &&
3470
53.4k
        
(3
!TT.isOSWindows()3
||
TT.isOSBinFormatELF()0
||
TT.isOSBinFormatMachO()0
))
3471
3
      return false;
3472
53.5k
  }
3473
53.5k
3474
53.5k
  // Now we search for cases where we can use a tail call without changing the
3475
53.5k
  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3476
53.5k
  // concept.
3477
53.5k
3478
53.5k
  // I want anyone implementing a new calling convention to think long and hard
3479
53.5k
  // about this assert.
3480
53.5k
  assert((!isVarArg || CalleeCC == CallingConv::C) &&
3481
53.5k
         "Unexpected variadic calling convention");
3482
53.5k
3483
53.5k
  LLVMContext &C = *DAG.getContext();
3484
53.5k
  if (isVarArg && 
!Outs.empty()660
) {
3485
660
    // At least two cases here: if caller is fastcc then we can't have any
3486
660
    // memory arguments (we'd be expected to clean up the stack afterwards). If
3487
660
    // caller is C then we could potentially use its argument area.
3488
660
3489
660
    // FIXME: for now we take the most conservative of these in both cases:
3490
660
    // disallow all variadic memory operands.
3491
660
    SmallVector<CCValAssign, 16> ArgLocs;
3492
660
    CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3493
660
3494
660
    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3495
660
    for (const CCValAssign &ArgLoc : ArgLocs)
3496
668
      if (!ArgLoc.isRegLoc())
3497
658
        return false;
3498
660
  }
3499
53.5k
3500
53.5k
  // Check that the call results are passed in the same way.
3501
53.5k
  
if (52.8k
!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3502
52.8k
                                  CCAssignFnForCall(CalleeCC, isVarArg),
3503
52.8k
                                  CCAssignFnForCall(CallerCC, isVarArg)))
3504
0
    return false;
3505
52.8k
  // The callee has to preserve all registers the caller needs to preserve.
3506
52.8k
  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3507
52.8k
  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3508
52.8k
  if (!CCMatch) {
3509
80
    const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3510
80
    if (Subtarget->hasCustomCallingConv()) {
3511
0
      TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3512
0
      TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3513
0
    }
3514
80
    if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3515
4
      return false;
3516
52.8k
  }
3517
52.8k
3518
52.8k
  // Nothing more to check if the callee is taking no arguments
3519
52.8k
  if (Outs.empty())
3520
172
    return true;
3521
52.7k
3522
52.7k
  SmallVector<CCValAssign, 16> ArgLocs;
3523
52.7k
  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3524
52.7k
3525
52.7k
  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3526
52.7k
3527
52.7k
  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3528
52.7k
3529
52.7k
  // If the stack arguments for this call do not fit into our own save area then
3530
52.7k
  // the call cannot be made tail.
3531
52.7k
  if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3532
31
    return false;
3533
52.6k
3534
52.6k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
3535
52.6k
  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3536
3
    return false;
3537
52.6k
3538
52.6k
  return true;
3539
52.6k
}
3540
3541
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3542
                                                   SelectionDAG &DAG,
3543
                                                   MachineFrameInfo &MFI,
3544
13
                                                   int ClobberedFI) const {
3545
13
  SmallVector<SDValue, 8> ArgChains;
3546
13
  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3547
13
  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3548
13
3549
13
  // Include the original chain at the beginning of the list. When this is
3550
13
  // used by target LowerCall hooks, this helps legalize find the
3551
13
  // CALLSEQ_BEGIN node.
3552
13
  ArgChains.push_back(Chain);
3553
13
3554
13
  // Add a chain value for each stack argument corresponding
3555
13
  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3556
13
                            UE = DAG.getEntryNode().getNode()->use_end();
3557
154
       U != UE; 
++U141
)
3558
141
    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3559
22
      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3560
18
        if (FI->getIndex() < 0) {
3561
18
          int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3562
18
          int64_t InLastByte = InFirstByte;
3563
18
          InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3564
18
3565
18
          if ((InFirstByte <= FirstByte && 
FirstByte <= InLastByte14
) ||
3566
18
              
(9
FirstByte <= InFirstByte9
&&
InFirstByte <= LastByte4
))
3567
9
            ArgChains.push_back(SDValue(L, 1));
3568
18
        }
3569
13
3570
13
  // Build a tokenfactor for all the chains.
3571
13
  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3572
13
}
3573
3574
bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3575
233k
                                                   bool TailCallOpt) const {
3576
233k
  return CallCC == CallingConv::Fast && 
TailCallOpt9.78k
;
3577
233k
}
3578
3579
/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3580
/// and add input and output parameter nodes.
3581
SDValue
3582
AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3583
248k
                                 SmallVectorImpl<SDValue> &InVals) const {
3584
248k
  SelectionDAG &DAG = CLI.DAG;
3585
248k
  SDLoc &DL = CLI.DL;
3586
248k
  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3587
248k
  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3588
248k
  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3589
248k
  SDValue Chain = CLI.Chain;
3590
248k
  SDValue Callee = CLI.Callee;
3591
248k
  bool &IsTailCall = CLI.IsTailCall;
3592
248k
  CallingConv::ID CallConv = CLI.CallConv;
3593
248k
  bool IsVarArg = CLI.IsVarArg;
3594
248k
3595
248k
  MachineFunction &MF = DAG.getMachineFunction();
3596
248k
  bool IsThisReturn = false;
3597
248k
3598
248k
  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3599
248k
  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3600
248k
  bool IsSibCall = false;
3601
248k
3602
248k
  if (IsTailCall) {
3603
53.5k
    // Check if it's really possible to do a tail call.
3604
53.5k
    IsTailCall = isEligibleForTailCallOptimization(
3605
53.5k
        Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3606
53.5k
    if (!IsTailCall && 
CLI.CS704
&&
CLI.CS.isMustTailCall()704
)
3607
0
      report_fatal_error("failed to perform tail call elimination on a call "
3608
0
                         "site marked musttail");
3609
53.5k
3610
53.5k
    // A sibling call is one where we're under the usual C ABI and not planning
3611
53.5k
    // to change that but can still do a tail call:
3612
53.5k
    if (!TailCallOpt && 
IsTailCall53.5k
)
3613
52.8k
      IsSibCall = true;
3614
53.5k
3615
53.5k
    if (IsTailCall)
3616
52.8k
      ++NumTailCalls;
3617
53.5k
  }
3618
248k
3619
248k
  // Analyze operands of the call, assigning locations to each operand.
3620
248k
  SmallVector<CCValAssign, 16> ArgLocs;
3621
248k
  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3622
248k
                 *DAG.getContext());
3623
248k
3624
248k
  if (IsVarArg) {
3625
9.39k
    // Handle fixed and variable vector arguments differently.
3626
9.39k
    // Variable vector arguments always go into memory.
3627
9.39k
    unsigned NumArgs = Outs.size();
3628
9.39k
3629
42.2k
    for (unsigned i = 0; i != NumArgs; 
++i32.8k
) {
3630
32.8k
      MVT ArgVT = Outs[i].VT;
3631
32.8k
      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3632
32.8k
      CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3633
32.8k
                                               /*IsVarArg=*/ !Outs[i].IsFixed);
3634
32.8k
      bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3635
32.8k
      assert(!Res && "Call operand has unhandled type");
3636
32.8k
      (void)Res;
3637
32.8k
    }
3638
239k
  } else {
3639
239k
    // At this point, Outs[].VT may already be promoted to i32. To correctly
3640
239k
    // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3641
239k
    // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3642
239k
    // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3643
239k
    // we use a special version of AnalyzeCallOperands to pass in ValVT and
3644
239k
    // LocVT.
3645
239k
    unsigned NumArgs = Outs.size();
3646
789k
    for (unsigned i = 0; i != NumArgs; 
++i549k
) {
3647
549k
      MVT ValVT = Outs[i].VT;
3648
549k
      // Get type of the original argument.
3649
549k
      EVT ActualVT = getValueType(DAG.getDataLayout(),
3650
549k
                                  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3651
549k
                                  /*AllowUnknown*/ true);
3652
549k
      MVT ActualMVT = ActualVT.isSimple() ? 
ActualVT.getSimpleVT()549k
:
ValVT6
;
3653
549k
      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3654
549k
      // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3655
549k
      if (ActualMVT == MVT::i1 || 
ActualMVT == MVT::i8545k
)
3656
5.14k
        ValVT = MVT::i8;
3657
544k
      else if (ActualMVT == MVT::i16)
3658
1.67k
        ValVT = MVT::i16;
3659
549k
3660
549k
      CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3661
549k
      bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3662
549k
      assert(!Res && "Call operand has unhandled type");
3663
549k
      (void)Res;
3664
549k
    }
3665
239k
  }
3666
248k
3667
248k
  // Get a count of how many bytes are to be pushed on the stack.
3668
248k
  unsigned NumBytes = CCInfo.getNextStackOffset();
3669
248k
3670
248k
  if (IsSibCall) {
3671
52.8k
    // Since we're not changing the ABI to make this a tail call, the memory
3672
52.8k
    // operands are already available in the caller's incoming argument space.
3673
52.8k
    NumBytes = 0;
3674
52.8k
  }
3675
248k
3676
248k
  // FPDiff is the byte offset of the call's argument area from the callee's.
3677
248k
  // Stores to callee stack arguments will be placed in FixedStackSlots offset
3678
248k
  // by this amount for a tail call. In a sibling call it must be 0 because the
3679
248k
  // caller will deallocate the entire stack and the callee still expects its
3680
248k
  // arguments to begin at SP+0. Completely unused for non-tail calls.
3681
248k
  int FPDiff = 0;
3682
248k
3683
248k
  if (IsTailCall && 
!IsSibCall52.8k
) {
3684
7
    unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3685
7
3686
7
    // Since callee will pop argument stack as a tail call, we must keep the
3687
7
    // popped size 16-byte aligned.
3688
7
    NumBytes = alignTo(NumBytes, 16);
3689
7
3690
7
    // FPDiff will be negative if this tail call requires more space than we
3691
7
    // would automatically have in our incoming argument space. Positive if we
3692
7
    // can actually shrink the stack.
3693
7
    FPDiff = NumReusableBytes - NumBytes;
3694
7
3695
7
    // The stack pointer must be 16-byte aligned at all times it's used for a
3696
7
    // memory operation, which in practice means at *all* times and in
3697
7
    // particular across call boundaries. Therefore our own arguments started at
3698
7
    // a 16-byte aligned SP and the delta applied for the tail call should
3699
7
    // satisfy the same constraint.
3700
7
    assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
3701
7
  }
3702
248k
3703
248k
  // Adjust the stack pointer for the new arguments...
3704
248k
  // These operations are automatically eliminated by the prolog/epilog pass
3705
248k
  if (!IsSibCall)
3706
196k
    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3707
248k
3708
248k
  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3709
248k
                                        getPointerTy(DAG.getDataLayout()));
3710
248k
3711
248k
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3712
248k
  SmallVector<SDValue, 8> MemOpChains;
3713
248k
  auto PtrVT = getPointerTy(DAG.getDataLayout());
3714
248k
3715
248k
  if (IsVarArg && 
CLI.CS9.39k
&&
CLI.CS.isMustTailCall()9.39k
) {
3716
2
    const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
3717
32
    for (const auto &F : Forwards) {
3718
32
      SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
3719
32
       RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3720
32
    }
3721
2
  }
3722
248k
3723
248k
  // Walk the register/memloc assignments, inserting copies/loads.
3724
831k
  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3725
582k
       ++i, ++realArgIdx) {
3726
582k
    CCValAssign &VA = ArgLocs[i];
3727
582k
    SDValue Arg = OutVals[realArgIdx];
3728
582k
    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3729
582k
3730
582k
    // Promote the value if needed.
3731
582k
    switch (VA.getLocInfo()) {
3732
582k
    default:
3733
0
      llvm_unreachable("Unknown loc info!");
3734
582k
    case CCValAssign::Full:
3735
569k
      break;
3736
582k
    case CCValAssign::SExt:
3737
386
      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3738
386
      break;
3739
582k
    case CCValAssign::ZExt:
3740
6.25k
      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3741
6.25k
      break;
3742
582k
    case CCValAssign::AExt:
3743
5.74k
      if (Outs[realArgIdx].ArgVT == MVT::i1) {
3744
2
        // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3745
2
        Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3746
2
        Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3747
2
      }
3748
5.74k
      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3749
5.74k
      break;
3750
582k
    case CCValAssign::BCvt:
3751
230
      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3752
230
      break;
3753
582k
    case CCValAssign::FPExt:
3754
0
      Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3755
0
      break;
3756
582k
    }
3757
582k
3758
582k
    if (VA.isRegLoc()) {
3759
560k
      if (realArgIdx == 0 && 
Flags.isReturned()214k
&&
!Flags.isSwiftSelf()3.62k
&&
3760
560k
          
Outs[0].VT == MVT::i643.62k
) {
3761
3.62k
        assert(VA.getLocVT() == MVT::i64 &&
3762
3.62k
               "unexpected calling convention register assignment");
3763
3.62k
        assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
3764
3.62k
               "unexpected use of 'returned'");
3765
3.62k
        IsThisReturn = true;
3766
3.62k
      }
3767
560k
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3768
560k
    } else {
3769
21.4k
      assert(VA.isMemLoc());
3770
21.4k
3771
21.4k
      SDValue DstAddr;
3772
21.4k
      MachinePointerInfo DstInfo;
3773
21.4k
3774
21.4k
      // FIXME: This works on big-endian for composite byvals, which are the
3775
21.4k
      // common case. It should also work for fundamental types too.
3776
21.4k
      uint32_t BEAlign = 0;
3777
21.4k
      unsigned OpSize = Flags.isByVal() ? 
Flags.getByValSize() * 89
3778
21.4k
                                        : 
VA.getValVT().getSizeInBits()21.4k
;
3779
21.4k
      OpSize = (OpSize + 7) / 8;
3780
21.4k
      if (!Subtarget->isLittleEndian() && 
!Flags.isByVal()11
&&
3781
21.4k
          
!Flags.isInConsecutiveRegs()10
) {
3782
8
        if (OpSize < 8)
3783
6
          BEAlign = 8 - OpSize;
3784
8
      }
3785
21.4k
      unsigned LocMemOffset = VA.getLocMemOffset();
3786
21.4k
      int32_t Offset = LocMemOffset + BEAlign;
3787
21.4k
      SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3788
21.4k
      PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3789
21.4k
3790
21.4k
      if (IsTailCall) {
3791
13
        Offset = Offset + FPDiff;
3792
13
        int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3793
13
3794
13
        DstAddr = DAG.getFrameIndex(FI, PtrVT);
3795
13
        DstInfo =
3796
13
            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3797
13
3798
13
        // Make sure any stack arguments overlapping with where we're storing
3799
13
        // are loaded before this eventual operation. Otherwise they'll be
3800
13
        // clobbered.
3801
13
        Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3802
21.4k
      } else {
3803
21.4k
        SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3804
21.4k
3805
21.4k
        DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3806
21.4k
        DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3807
21.4k
                                               LocMemOffset);
3808
21.4k
      }
3809
21.4k
3810
21.4k
      if (Outs[i].Flags.isByVal()) {
3811
9
        SDValue SizeNode =
3812
9
            DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3813
9
        SDValue Cpy = DAG.getMemcpy(
3814
9
            Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3815
9
            /*isVol = */ false, /*AlwaysInline = */ false,
3816
9
            /*isTailCall = */ false,
3817
9
            DstInfo, MachinePointerInfo());
3818
9
3819
9
        MemOpChains.push_back(Cpy);
3820
21.4k
      } else {
3821
21.4k
        // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3822
21.4k
        // promoted to a legal register type i32, we should truncate Arg back to
3823
21.4k
        // i1/i8/i16.
3824
21.4k
        if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3825
21.4k
            
VA.getValVT() == MVT::i1621.4k
)
3826
27
          Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3827
21.4k
3828
21.4k
        SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3829
21.4k
        MemOpChains.push_back(Store);
3830
21.4k
      }
3831
21.4k
    }
3832
582k
  }
3833
248k
3834
248k
  if (!MemOpChains.empty())
3835
9.10k
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3836
248k
3837
248k
  // Build a sequence of copy-to-reg nodes chained together with token chain
3838
248k
  // and flag operands which copy the outgoing args into the appropriate regs.
3839
248k
  SDValue InFlag;
3840
560k
  for (auto &RegToPass : RegsToPass) {
3841
560k
    Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3842
560k
                             RegToPass.second, InFlag);
3843
560k
    InFlag = Chain.getValue(1);
3844
560k
  }
3845
248k
3846
248k
  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3847
248k
  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3848
248k
  // node so that legalize doesn't hack it.
3849
248k
  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3850
240k
    auto GV = G->getGlobal();
3851
240k
    if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3852
240k
        AArch64II::MO_GOT) {
3853
2
      Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3854
2
      Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3855
240k
    } else if (Subtarget->isTargetCOFF() && 
GV->hasDLLImportStorageClass()94
) {
3856
7
      assert(Subtarget->isTargetWindows() &&
3857
7
             "Windows is the only supported COFF target");
3858
7
      Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3859
240k
    } else {
3860
240k
      const GlobalValue *GV = G->getGlobal();
3861
240k
      Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3862
240k
    }
3863
240k
  } else 
if (auto *8.95k
S8.95k
= dyn_cast<ExternalSymbolSDNode>(Callee)) {
3864
6.20k
    if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3865
6.20k
        
Subtarget->isTargetMachO()3
) {
3866
0
      const char *Sym = S->getSymbol();
3867
0
      Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3868
0
      Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3869
6.20k
    } else {
3870
6.20k
      const char *Sym = S->getSymbol();
3871
6.20k
      Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3872
6.20k
    }
3873
6.20k
  }
3874
248k
3875
248k
  // We don't usually want to end the call-sequence here because we would tidy
3876
248k
  // the frame up *after* the call, however in the ABI-changing tail-call case
3877
248k
  // we've carefully laid out the parameters so that when sp is reset they'll be
3878
248k
  // in the correct location.
3879
248k
  if (IsTailCall && 
!IsSibCall52.8k
) {
3880
7
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3881
7
                               DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3882
7
    InFlag = Chain.getValue(1);
3883
7
  }
3884
248k
3885
248k
  std::vector<SDValue> Ops;
3886
248k
  Ops.push_back(Chain);
3887
248k
  Ops.push_back(Callee);
3888
248k
3889
248k
  if (IsTailCall) {
3890
52.8k
    // Each tail call may have to adjust the stack by a different amount, so
3891
52.8k
    // this information must travel along with the operation for eventual
3892
52.8k
    // consumption by emitEpilogue.
3893
52.8k
    Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3894
52.8k
  }
3895
248k
3896
248k
  // Add argument registers to the end of the list so that they are known live
3897
248k
  // into the call.
3898
248k
  for (auto &RegToPass : RegsToPass)
3899
560k
    Ops.push_back(DAG.getRegister(RegToPass.first,
3900
560k
                                  RegToPass.second.getValueType()));
3901
248k
3902
248k
  // Add a register mask operand representing the call-preserved registers.
3903
248k
  const uint32_t *Mask;
3904
248k
  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3905
248k
  if (IsThisReturn) {
3906
3.62k
    // For 'this' returns, use the X0-preserving mask if applicable
3907
3.62k
    Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3908
3.62k
    if (!Mask) {
3909
0
      IsThisReturn = false;
3910
0
      Mask = TRI->getCallPreservedMask(MF, CallConv);
3911
0
    }
3912
3.62k
  } else
3913
245k
    Mask = TRI->getCallPreservedMask(MF, CallConv);
3914
248k
3915
248k
  if (Subtarget->hasCustomCallingConv())
3916
10
    TRI->UpdateCustomCallPreservedMask(MF, &Mask);
3917
248k
3918
248k
  if (TRI->isAnyArgRegReserved(MF))
3919
2
    TRI->emitReservedArgRegCallError(MF);
3920
248k
3921
248k
  assert(Mask && "Missing call preserved mask for calling convention");
3922
248k
  Ops.push_back(DAG.getRegisterMask(Mask));
3923
248k
3924
248k
  if (InFlag.getNode())
3925
214k
    Ops.push_back(InFlag);
3926
248k
3927
248k
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3928
248k
3929
248k
  // If we're doing a tall call, use a TC_RETURN here rather than an
3930
248k
  // actual call instruction.
3931
248k
  if (IsTailCall) {
3932
52.8k
    MF.getFrameInfo().setHasTailCall();
3933
52.8k
    return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3934
52.8k
  }
3935
196k
3936
196k
  // Returns a chain and a flag for retval copy to use.
3937
196k
  Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3938
196k
  InFlag = Chain.getValue(1);
3939
196k
3940
196k
  uint64_t CalleePopBytes =
3941
196k
      DoesCalleeRestoreStack(CallConv, TailCallOpt) ? 
alignTo(NumBytes, 16)19
:
0196k
;
3942
196k
3943
196k
  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3944
196k
                             DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3945
196k
                             InFlag, DL);
3946
196k
  if (!Ins.empty())
3947
110k
    InFlag = Chain.getValue(1);
3948
196k
3949
196k
  // Handle result values, copying them out of physregs into vregs that we
3950
196k
  // return.
3951
196k
  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3952
196k
                         InVals, IsThisReturn,
3953
196k
                         IsThisReturn ? 
OutVals[0]3.55k
:
SDValue()192k
);
3954
196k
}
3955
3956
bool AArch64TargetLowering::CanLowerReturn(
3957
    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3958
287k
    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3959
287k
  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3960
287k
                          ? 
RetCC_AArch64_WebKit_JS17
3961
287k
                          : 
RetCC_AArch64_AAPCS287k
;
3962
287k
  SmallVector<CCValAssign, 16> RVLocs;
3963
287k
  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3964
287k
  return CCInfo.CheckReturn(Outs, RetCC);
3965
287k
}
3966
3967
SDValue
3968
AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3969
                                   bool isVarArg,
3970
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
3971
                                   const SmallVectorImpl<SDValue> &OutVals,
3972
36.9k
                                   const SDLoc &DL, SelectionDAG &DAG) const {
3973
36.9k
  auto &MF = DAG.getMachineFunction();
3974
36.9k
  auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3975
36.9k
3976
36.9k
  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3977
36.9k
                          ? 
RetCC_AArch64_WebKit_JS3
3978
36.9k
                          : 
RetCC_AArch64_AAPCS36.9k
;
3979
36.9k
  SmallVector<CCValAssign, 16> RVLocs;
3980
36.9k
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3981
36.9k
                 *DAG.getContext());
3982
36.9k
  CCInfo.AnalyzeReturn(Outs, RetCC);
3983
36.9k
3984
36.9k
  // Copy the result values into the output registers.
3985
36.9k
  SDValue Flag;
3986
36.9k
  SmallVector<SDValue, 4> RetOps(1, Chain);
3987
62.1k
  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3988
36.9k
       
++i, ++realRVLocIdx25.2k
) {
3989
25.2k
    CCValAssign &VA = RVLocs[i];
3990
25.2k
    assert(VA.isRegLoc() && "Can only return in registers!");
3991
25.2k
    SDValue Arg = OutVals[realRVLocIdx];
3992
25.2k
3993
25.2k
    switch (VA.getLocInfo()) {
3994
25.2k
    default:
3995
0
      llvm_unreachable("Unknown loc info!");
3996
25.2k
    case CCValAssign::Full:
3997
23.9k
      if (Outs[i].ArgVT == MVT::i1) {
3998
305
        // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3999
305
        // value. This is strictly redundant on Darwin (which uses "zeroext
4000
305
        // i1"), but will be optimised out before ISel.
4001
305
        Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4002
305
        Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4003
305
      }
4004
23.9k
      break;
4005
25.2k
    case CCValAssign::BCvt:
4006
1.23k
      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
4007
1.23k
      break;
4008
25.2k
    }
4009
25.2k
4010
25.2k
    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
4011
25.2k
    Flag = Chain.getValue(1);
4012
25.2k
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4013
25.2k
  }
4014
36.9k
4015
36.9k
  // Windows AArch64 ABIs require that for returning structs by value we copy
4016
36.9k
  // the sret argument into X0 for the return.
4017
36.9k
  // We saved the argument into a virtual register in the entry block,
4018
36.9k
  // so now we copy the value out and into X0.
4019
36.9k
  if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
4020
3
    SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
4021
3
                                     getPointerTy(MF.getDataLayout()));
4022
3
4023
3
    unsigned RetValReg = AArch64::X0;
4024
3
    Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
4025
3
    Flag = Chain.getValue(1);
4026
3
4027
3
    RetOps.push_back(
4028
3
      DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
4029
3
  }
4030
36.9k
4031
36.9k
  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4032
36.9k
  const MCPhysReg *I =
4033
36.9k
      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
4034
36.9k
  if (I) {
4035
456
    for (; *I; 
++I448
) {
4036
448
      if (AArch64::GPR64RegClass.contains(*I))
4037
192
        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
4038
256
      else if (AArch64::FPR64RegClass.contains(*I))
4039
256
        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
4040
256
      else
4041
256
        
llvm_unreachable0
("Unexpected register class in CSRsViaCopy!");
4042
448
    }
4043
8
  }
4044
36.9k
4045
36.9k
  RetOps[0] = Chain; // Update chain.
4046
36.9k
4047
36.9k
  // Add the flag if we have it.
4048
36.9k
  if (Flag.getNode())
4049
23.8k
    RetOps.push_back(Flag);
4050
36.9k
4051
36.9k
  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
4052
36.9k
}
4053
4054
//===----------------------------------------------------------------------===//
4055
//  Other Lowering Code
4056
//===----------------------------------------------------------------------===//
4057
4058
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
4059
                                             SelectionDAG &DAG,
4060
405k
                                             unsigned Flag) const {
4061
405k
  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
4062
405k
                                    N->getOffset(), Flag);
4063
405k
}
4064
4065
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
4066
                                             SelectionDAG &DAG,
4067
3.32k
                                             unsigned Flag) const {
4068
3.32k
  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
4069
3.32k
}
4070
4071
SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
4072
                                             SelectionDAG &DAG,
4073
61.1k
                                             unsigned Flag) const {
4074
61.1k
  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
4075
61.1k
                                   N->getOffset(), Flag);
4076
61.1k
}
4077
4078
SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
4079
                                             SelectionDAG &DAG,
4080
17
                                             unsigned Flag) const {
4081
17
  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
4082
17
}
4083
4084
// (loadGOT sym)
4085
template <class NodeTy>
4086
SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
4087
48.0k
                                      unsigned Flags) const {
4088
48.0k
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
4089
48.0k
  SDLoc DL(N);
4090
48.0k
  EVT Ty = getPointerTy(DAG.getDataLayout());
4091
48.0k
  SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
4092
48.0k
  // FIXME: Once remat is capable of dealing with instructions with register
4093
48.0k
  // operands, expand this into two nodes instead of using a wrapper node.
4094
48.0k
  return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
4095
48.0k
}
llvm::SDValue llvm::AArch64TargetLowering::getGOT<llvm::GlobalAddressSDNode>(llvm::GlobalAddressSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4087
48.0k
                                      unsigned Flags) const {
4088
48.0k
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
4089
48.0k
  SDLoc DL(N);
4090
48.0k
  EVT Ty = getPointerTy(DAG.getDataLayout());
4091
48.0k
  SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
4092
48.0k
  // FIXME: Once remat is capable of dealing with instructions with register
4093
48.0k
  // operands, expand this into two nodes instead of using a wrapper node.
4094
48.0k
  return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
4095
48.0k
}
Unexecuted instantiation: llvm::SDValue llvm::AArch64TargetLowering::getGOT<llvm::ConstantPoolSDNode>(llvm::ConstantPoolSDNode*, llvm::SelectionDAG&, unsigned int) const
4096
4097
// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
4098
template <class NodeTy>
4099
SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
4100
28
                                            unsigned Flags) const {
4101
28
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
4102
28
  SDLoc DL(N);
4103
28
  EVT Ty = getPointerTy(DAG.getDataLayout());
4104
28
  const unsigned char MO_NC = AArch64II::MO_NC;
4105
28
  return DAG.getNode(
4106
28
      AArch64ISD::WrapperLarge, DL, Ty,
4107
28
      getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4108
28
      getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4109
28
      getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4110
28
      getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4111
28
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrLarge<llvm::GlobalAddressSDNode>(llvm::GlobalAddressSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4100
22
                                            unsigned Flags) const {
4101
22
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
4102
22
  SDLoc DL(N);
4103
22
  EVT Ty = getPointerTy(DAG.getDataLayout());
4104
22
  const unsigned char MO_NC = AArch64II::MO_NC;
4105
22
  return DAG.getNode(
4106
22
      AArch64ISD::WrapperLarge, DL, Ty,
4107
22
      getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4108
22
      getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4109
22
      getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4110
22
      getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4111
22
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrLarge<llvm::JumpTableSDNode>(llvm::JumpTableSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4100
2
                                            unsigned Flags) const {
4101
2
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
4102
2
  SDLoc DL(N);
4103
2
  EVT Ty = getPointerTy(DAG.getDataLayout());
4104
2
  const unsigned char MO_NC = AArch64II::MO_NC;
4105
2
  return DAG.getNode(
4106
2
      AArch64ISD::WrapperLarge, DL, Ty,
4107
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4108
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4109
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4110
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4111
2
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrLarge<llvm::ConstantPoolSDNode>(llvm::ConstantPoolSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4100
2
                                            unsigned Flags) const {
4101
2
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
4102
2
  SDLoc DL(N);
4103
2
  EVT Ty = getPointerTy(DAG.getDataLayout());
4104
2
  const unsigned char MO_NC = AArch64II::MO_NC;
4105
2
  return DAG.getNode(
4106
2
      AArch64ISD::WrapperLarge, DL, Ty,
4107
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4108
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4109
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4110
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4111
2
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrLarge<llvm::BlockAddressSDNode>(llvm::BlockAddressSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4100
2
                                            unsigned Flags) const {
4101
2
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
4102
2
  SDLoc DL(N);
4103
2
  EVT Ty = getPointerTy(DAG.getDataLayout());
4104
2
  const unsigned char MO_NC = AArch64II::MO_NC;
4105
2
  return DAG.getNode(
4106
2
      AArch64ISD::WrapperLarge, DL, Ty,
4107
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4108
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4109
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4110
2
      getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4111
2
}
4112
4113
// (addlow (adrp %hi(sym)) %lo(sym))
4114
template <class NodeTy>
4115
SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4116
210k
                                       unsigned Flags) const {
4117
210k
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
4118
210k
  SDLoc DL(N);
4119
210k
  EVT Ty = getPointerTy(DAG.getDataLayout());
4120
210k
  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4121
210k
  SDValue Lo = getTargetNode(N, Ty, DAG,
4122
210k
                             AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4123
210k
  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4124
210k
  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4125
210k
}
llvm::SDValue llvm::AArch64TargetLowering::getAddr<llvm::GlobalAddressSDNode>(llvm::GlobalAddressSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4116
178k
                                       unsigned Flags) const {
4117
178k
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
4118
178k
  SDLoc DL(N);
4119
178k
  EVT Ty = getPointerTy(DAG.getDataLayout());
4120
178k
  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4121
178k
  SDValue Lo = getTargetNode(N, Ty, DAG,
4122
178k
                             AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4123
178k
  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4124
178k
  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4125
178k
}
llvm::SDValue llvm::AArch64TargetLowering::getAddr<llvm::JumpTableSDNode>(llvm::JumpTableSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4116
1.65k
                                       unsigned Flags) const {
4117
1.65k
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
4118
1.65k
  SDLoc DL(N);
4119
1.65k
  EVT Ty = getPointerTy(DAG.getDataLayout());
4120
1.65k
  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4121
1.65k
  SDValue Lo = getTargetNode(N, Ty, DAG,
4122
1.65k
                             AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4123
1.65k
  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4124
1.65k
  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4125
1.65k
}
llvm::SDValue llvm::AArch64TargetLowering::getAddr<llvm::ConstantPoolSDNode>(llvm::ConstantPoolSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4116
30.5k
                                       unsigned Flags) const {
4117
30.5k
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
4118
30.5k
  SDLoc DL(N);
4119
30.5k
  EVT Ty = getPointerTy(DAG.getDataLayout());
4120
30.5k
  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4121
30.5k
  SDValue Lo = getTargetNode(N, Ty, DAG,
4122
30.5k
                             AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4123
30.5k
  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4124
30.5k
  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4125
30.5k
}
llvm::SDValue llvm::AArch64TargetLowering::getAddr<llvm::BlockAddressSDNode>(llvm::BlockAddressSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4116
4
                                       unsigned Flags) const {
4117
4
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
4118
4
  SDLoc DL(N);
4119
4
  EVT Ty = getPointerTy(DAG.getDataLayout());
4120
4
  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4121
4
  SDValue Lo = getTargetNode(N, Ty, DAG,
4122
4
                             AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4123
4
  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4124
4
  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4125
4
}
4126
4127
// (adr sym)
4128
template <class NodeTy>
4129
SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
4130
93
                                           unsigned Flags) const {
4131
93
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
4132
93
  SDLoc DL(N);
4133
93
  EVT Ty = getPointerTy(DAG.getDataLayout());
4134
93
  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4135
93
  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4136
93
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrTiny<llvm::GlobalAddressSDNode>(llvm::GlobalAddressSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4130
88
                                           unsigned Flags) const {
4131
88
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
4132
88
  SDLoc DL(N);
4133
88
  EVT Ty = getPointerTy(DAG.getDataLayout());
4134
88
  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4135
88
  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4136
88
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrTiny<llvm::JumpTableSDNode>(llvm::JumpTableSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4130
2
                                           unsigned Flags) const {
4131
2
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
4132
2
  SDLoc DL(N);
4133
2
  EVT Ty = getPointerTy(DAG.getDataLayout());
4134
2
  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4135
2
  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4136
2
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrTiny<llvm::ConstantPoolSDNode>(llvm::ConstantPoolSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4130
2
                                           unsigned Flags) const {
4131
2
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
4132
2
  SDLoc DL(N);
4133
2
  EVT Ty = getPointerTy(DAG.getDataLayout());
4134
2
  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4135
2
  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4136
2
}
llvm::SDValue llvm::AArch64TargetLowering::getAddrTiny<llvm::BlockAddressSDNode>(llvm::BlockAddressSDNode*, llvm::SelectionDAG&, unsigned int) const
Line
Count
Source
4130
1
                                           unsigned Flags) const {
4131
1
  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
4132
1
  SDLoc DL(N);
4133
1
  EVT Ty = getPointerTy(DAG.getDataLayout());
4134
1
  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4135
1
  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4136
1
}
4137
4138
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4139
226k
                                                  SelectionDAG &DAG) const {
4140
226k
  GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4141
226k
  const GlobalValue *GV = GN->getGlobal();
4142
226k
  unsigned char OpFlags =
4143
226k
      Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4144
226k
4145
226k
  if (OpFlags != AArch64II::MO_NO_FLAG)
4146
226k
    assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
4147
226k
           "unexpected offset in global node");
4148
226k
4149
226k
  // This also catches the large code model case for Darwin, and tiny code
4150
226k
  // model with got relocations.
4151
226k
  if ((OpFlags & AArch64II::MO_GOT) != 0) {
4152
48.0k
    return getGOT(GN, DAG, OpFlags);
4153
48.0k
  }
4154
178k
4155
178k
  SDValue Result;
4156
178k
  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4157
22
    Result = getAddrLarge(GN, DAG, OpFlags);
4158
178k
  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4159
88
    Result = getAddrTiny(GN, DAG, OpFlags);
4160
178k
  } else {
4161
178k
    Result = getAddr(GN, DAG, OpFlags);
4162
178k
  }
4163
178k
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4164
178k
  SDLoc DL(GN);
4165
178k
  if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
4166
0
    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4167
0
                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4168
178k
  return Result;
4169
178k
}
4170
4171
/// Convert a TLS address reference into the correct sequence of loads
4172
/// and calls to compute the variable's address (for Darwin, currently) and
4173
/// return an SDValue containing the final node.
4174
4175
/// Darwin only has one TLS scheme which must be capable of dealing with the
4176
/// fully general situation, in the worst case. This means:
4177
///     + "extern __thread" declaration.
4178
///     + Defined in a possibly unknown dynamic library.
4179
///
4180
/// The general system is that each __thread variable has a [3 x i64] descriptor
4181
/// which contains information used by the runtime to calculate the address. The
4182
/// only part of this the compiler needs to know about is the first xword, which
4183
/// contains a function pointer that must be called with the address of the
4184
/// entire descriptor in "x0".
4185
///
4186
/// Since this descriptor may be in a different unit, in general even the
4187
/// descriptor must be accessed via an indirect load. The "ideal" code sequence
4188
/// is:
4189
///     adrp x0, _var@TLVPPAGE
4190
///     ldr x0, [x0, _var@TLVPPAGEOFF]   ; x0 now contains address of descriptor
4191
///     ldr x1, [x0]                     ; x1 contains 1st entry of descriptor,
4192
///                                      ; the function pointer
4193
///     blr x1                           ; Uses descriptor address in x0
4194
///     ; Address of _var is now in x0.
4195
///
4196
/// If the address of _var's descriptor *is* known to the linker, then it can
4197
/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4198
/// a slight efficiency gain.
4199
SDValue
4200
AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4201
36
                                                   SelectionDAG &DAG) const {
4202
36
  assert(Subtarget->isTargetDarwin() &&
4203
36
         "This function expects a Darwin target");
4204
36
4205
36
  SDLoc DL(Op);
4206
36
  MVT PtrVT = getPointerTy(DAG.getDataLayout());
4207
36
  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4208
36
4209
36
  SDValue TLVPAddr =
4210
36
      DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4211
36
  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4212
36
4213
36
  // The first entry in the descriptor is a function pointer that we must call
4214
36
  // to obtain the address of the variable.
4215
36
  SDValue Chain = DAG.getEntryNode();
4216
36
  SDValue FuncTLVGet = DAG.getLoad(
4217
36
      MVT::i64, DL, Chain, DescAddr,
4218
36
      MachinePointerInfo::getGOT(DAG.getMachineFunction()),
4219
36
      /* Alignment = */ 8,
4220
36
      MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
4221
36
          MachineMemOperand::MODereferenceable);
4222
36
  Chain = FuncTLVGet.getValue(1);
4223
36
4224
36
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4225
36
  MFI.setAdjustsStack(true);
4226
36
4227
36
  // TLS calls preserve all registers except those that absolutely must be
4228
36
  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4229
36
  // silly).
4230
36
  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4231
36
  const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4232
36
  if (Subtarget->hasCustomCallingConv())
4233
0
    TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
4234
36
4235
36
  // Finally, we can make the call. This is just a degenerate version of a
4236
36
  // normal AArch64 call node: x0 takes the address of the descriptor, and
4237
36
  // returns the address of the variable in this thread.
4238
36
  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4239
36
  Chain =
4240
36
      DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4241
36
                  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4242
36
                  DAG.getRegisterMask(Mask), Chain.getValue(1));
4243
36
  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4244
36
}
4245
4246
/// When accessing thread-local variables under either the general-dynamic or
4247
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4248
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4249
/// is a function pointer to carry out the resolution.
4250
///
4251
/// The sequence is:
4252
///    adrp  x0, :tlsdesc:var
4253
///    ldr   x1, [x0, #:tlsdesc_lo12:var]
4254
///    add   x0, x0, #:tlsdesc_lo12:var
4255
///    .tlsdesccall var
4256
///    blr   x1
4257
///    (TPIDR_EL0 offset now in x0)
4258
///
4259
///  The above sequence must be produced unscheduled, to enable the linker to
4260
///  optimize/relax this sequence.
4261
///  Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4262
///  above sequence, and expanded really late in the compilation flow, to ensure
4263
///  the sequence is produced as per above.
4264
SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4265
                                                      const SDLoc &DL,
4266
32
                                                      SelectionDAG &DAG) const {
4267
32
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4268
32
4269
32
  SDValue Chain = DAG.getEntryNode();
4270
32
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4271
32
4272
32
  Chain =
4273
32
      DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4274
32
  SDValue Glue = Chain.getValue(1);
4275
32
4276
32
  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4277
32
}
4278
4279
SDValue
4280
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4281
51
                                                SelectionDAG &DAG) const {
4282
51
  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
4283
51
  if (getTargetMachine().getCodeModel() == CodeModel::Large)
4284
2
    report_fatal_error("ELF TLS only supported in small memory model");
4285
49
  // Different choices can be made for the maximum size of the TLS area for a
4286
49
  // module. For the small address model, the default TLS size is 16MiB and the
4287
49
  // maximum TLS size is 4GiB.
4288
49
  // FIXME: add -mtls-size command line option and make it control the 16MiB
4289
49
  // vs. 4GiB code sequence generation.
4290
49
  // FIXME: add tiny codemodel support. We currently generate the same code as
4291
49
  // small, which may be larger than needed.
4292
49
  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4293
49
4294
49
  TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4295
49
4296
49
  if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4297
31
    if (Model == TLSModel::LocalDynamic)
4298
8
      Model = TLSModel::GeneralDynamic;
4299
31
  }
4300
49
4301
49
  SDValue TPOff;
4302
49
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4303
49
  SDLoc DL(Op);
4304
49
  const GlobalValue *GV = GA->getGlobal();
4305
49
4306
49
  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4307
49
4308
49
  if (Model == TLSModel::LocalExec) {
4309
9
    SDValue HiVar = DAG.getTargetGlobalAddress(
4310
9
        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4311
9
    SDValue LoVar = DAG.getTargetGlobalAddress(
4312
9
        GV, DL, PtrVT, 0,
4313
9
        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4314
9
4315
9
    SDValue TPWithOff_lo =
4316
9
        SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4317
9
                                   HiVar,
4318
9
                                   DAG.getTargetConstant(0, DL, MVT::i32)),
4319
9
                0);
4320
9
    SDValue TPWithOff =
4321
9
        SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
4322
9
                                   LoVar,
4323
9
                                   DAG.getTargetConstant(0, DL, MVT::i32)),
4324
9
                0);
4325
9
    return TPWithOff;
4326
40
  } else if (Model == TLSModel::InitialExec) {
4327
8
    TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4328
8
    TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4329
32
  } else if (Model == TLSModel::LocalDynamic) {
4330
12
    // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4331
12
    // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4332
12
    // the beginning of the module's TLS region, followed by a DTPREL offset
4333
12
    // calculation.
4334
12
4335
12
    // These accesses will need deduplicating if there's more than one.
4336
12
    AArch64FunctionInfo *MFI =
4337
12
        DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4338
12
    MFI->incNumLocalDynamicTLSAccesses();
4339
12
4340
12
    // The call needs a relocation too for linker relaxation. It doesn't make
4341
12
    // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4342
12
    // the address.
4343
12
    SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4344
12
                                                  AArch64II::MO_TLS);
4345
12
4346
12
    // Now we can calculate the offset from TPIDR_EL0 to this module's
4347
12
    // thread-local area.
4348
12
    TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4349
12
4350
12
    // Now use :dtprel_whatever: operations to calculate this variable's offset
4351
12
    // in its thread-storage area.
4352
12
    SDValue HiVar = DAG.getTargetGlobalAddress(
4353
12
        GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4354
12
    SDValue LoVar = DAG.getTargetGlobalAddress(
4355
12
        GV, DL, MVT::i64, 0,
4356
12
        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4357
12
4358
12
    TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4359
12
                                       DAG.getTargetConstant(0, DL, MVT::i32)),
4360
12
                    0);
4361
12
    TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4362
12
                                       DAG.getTargetConstant(0, DL, MVT::i32)),
4363
12
                    0);
4364
20
  } else if (Model == TLSModel::GeneralDynamic) {
4365
20
    // The call needs a relocation too for linker relaxation. It doesn't make
4366
20
    // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4367
20
    // the address.
4368
20
    SDValue SymAddr =
4369
20
        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4370
20
4371
20
    // Finally we can make a call to calculate the offset from tpidr_el0.
4372
20
    TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4373
20
  } else
4374
20
    
llvm_unreachable0
("Unsupported ELF TLS access model");
4375
49
4376
49
  
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff)40
;
4377
49
}
4378
4379
SDValue
4380
AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
4381
5
                                                    SelectionDAG &DAG) const {
4382
5
  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
4383
5
4384
5
  SDValue Chain = DAG.getEntryNode();
4385
5
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4386
5
  SDLoc DL(Op);
4387
5
4388
5
  SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
4389
5
4390
5
  // Load the ThreadLocalStoragePointer from the TEB
4391
5
  // A pointer to the TLS array is located at offset 0x58 from the TEB.
4392
5
  SDValue TLSArray =
4393
5
      DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
4394
5
  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
4395
5
  Chain = TLSArray.getValue(1);
4396
5
4397
5
  // Load the TLS index from the C runtime;
4398
5
  // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4399
5
  // This also does the same as LOADgot, but using a generic i32 load,
4400
5
  // while LOADgot only loads i64.
4401
5
  SDValue TLSIndexHi =
4402
5
      DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4403
5
  SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4404
5
      "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4405
5
  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4406
5
  SDValue TLSIndex =
4407
5
      DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4408
5
  TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4409
5
  Chain = TLSIndex.getValue(1);
4410
5
4411
5
  // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4412
5
  // offset into the TLSArray.
4413
5
  TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4414
5
  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4415
5
                             DAG.getConstant(3, DL, PtrVT));
4416
5
  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4417
5
                            DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4418
5
                            MachinePointerInfo());
4419
5
  Chain = TLS.getValue(1);
4420
5
4421
5
  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4422
5
  const GlobalValue *GV = GA->getGlobal();
4423
5
  SDValue TGAHi = DAG.getTargetGlobalAddress(
4424
5
      GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4425
5
  SDValue TGALo = DAG.getTargetGlobalAddress(
4426
5
      GV, DL, PtrVT, 0,
4427
5
      AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4428
5
4429
5
  // Add the offset from the start of the .tls section (section base).
4430
5
  SDValue Addr =
4431
5
      SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4432
5
                                 DAG.getTargetConstant(0, DL, MVT::i32)),
4433
5
              0);
4434
5
  Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4435
5
  return Addr;
4436
5
}
4437
4438
SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4439
130
                                                     SelectionDAG &DAG) const {
4440
130
  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4441
130
  if (DAG.getTarget().useEmulatedTLS())
4442
38
    return LowerToTLSEmulatedModel(GA, DAG);
4443
92
4444
92
  if (Subtarget->isTargetDarwin())
4445
36
    return LowerDarwinGlobalTLSAddress(Op, DAG);
4446
56
  if (Subtarget->isTargetELF())
4447
51
    return LowerELFGlobalTLSAddress(Op, DAG);
4448
5
  if (Subtarget->isTargetWindows())
4449
5
    return LowerWindowsGlobalTLSAddress(Op, DAG);
4450
0
4451
0
  llvm_unreachable("Unexpected platform trying to use TLS");
4452
0
}
4453
4454
366k
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4455
366k
  SDValue Chain = Op.getOperand(0);
4456
366k
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4457
366k
  SDValue LHS = Op.getOperand(2);
4458
366k
  SDValue RHS = Op.getOperand(3);
4459
366k
  SDValue Dest = Op.getOperand(4);
4460
366k
  SDLoc dl(Op);
4461
366k
4462
366k
  MachineFunction &MF = DAG.getMachineFunction();
4463
366k
  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
4464
366k
  // will not be produced, as they are conditional branch instructions that do
4465
366k
  // not set flags.
4466
366k
  bool ProduceNonFlagSettingCondBr =
4467
366k
      !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
4468
366k
4469
366k
  // Handle f128 first, since lowering it will result in comparing the return
4470
366k
  // value of a libcall against zero, which is just what the rest of LowerBR_CC
4471
366k
  // is expecting to deal with.
4472
366k
  if (LHS.getValueType() == MVT::f128) {
4473
1
    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4474
1
4475
1
    // If softenSetCCOperands returned a scalar, we need to compare the result
4476
1
    // against zero to select between true and false values.
4477
1
    if (!RHS.getNode()) {
4478
0
      RHS = DAG.getConstant(0, dl, LHS.getValueType());
4479
0
      CC = ISD::SETNE;
4480
0
    }
4481
1
  }
4482
366k
4483
366k
  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4484
366k
  // instruction.
4485
366k
  if (isOverflowIntrOpRes(LHS) && 
isOneConstant(RHS)570
&&
4486
366k
      
(570
CC == ISD::SETEQ570
||
CC == ISD::SETNE570
)) {
4487
570
    // Only lower legal XALUO ops.
4488
570
    if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4489
0
      return SDValue();
4490
570
4491
570
    // The actual operation with overflow check.
4492
570
    AArch64CC::CondCode OFCC;
4493
570
    SDValue Value, Overflow;
4494
570
    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4495
570
4496
570
    if (CC == ISD::SETNE)
4497
570
      OFCC = getInvertedCondCode(OFCC);
4498
570
    SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4499
570
4500
570
    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4501
570
                       Overflow);
4502
570
  }
4503
365k
4504
365k
  if (LHS.getValueType().isInteger()) {
4505
362k
    assert((LHS.getValueType() == RHS.getValueType()) &&
4506
362k
           (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
4507
362k
4508
362k
    // If the RHS of the comparison is zero, we can potentially fold this
4509
362k
    // to a specialized branch.
4510
362k
    const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4511
362k
    if (RHSC && 
RHSC->getZExtValue() == 0297k
&&
ProduceNonFlagSettingCondBr216k
) {
4512
216k
      if (CC == ISD::SETEQ) {
4513
118k
        // See if we can use a TBZ to fold in an AND as well.
4514
118k
        // TBZ has a smaller branch displacement than CBZ.  If the offset is
4515
118k
        // out of bounds, a late MI-layer pass rewrites branches.
4516
118k
        // 403.gcc is an example that hits this case.
4517
118k
        if (LHS.getOpcode() == ISD::AND &&
4518
118k
            
isa<ConstantSDNode>(LHS.getOperand(1))10.0k
&&
4519
118k
            
isPowerOf2_64(LHS.getConstantOperandVal(1))9.58k
) {
4520
1.16k
          SDValue Test = LHS.getOperand(0);
4521
1.16k
          uint64_t Mask = LHS.getConstantOperandVal(1);
4522
1.16k
          return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4523
1.16k
                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4524
1.16k
                             Dest);
4525
1.16k
        }
4526
116k
4527
116k
        return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4528
116k
      } else 
if (98.9k
CC == ISD::SETNE98.9k
) {
4529
89.2k
        // See if we can use a TBZ to fold in an AND as well.
4530
89.2k
        // TBZ has a smaller branch displacement than CBZ.  If the offset is
4531
89.2k
        // out of bounds, a late MI-layer pass rewrites branches.
4532
89.2k
        // 403.gcc is an example that hits this case.
4533
89.2k
        if (LHS.getOpcode() == ISD::AND &&
4534
89.2k
            
isa<ConstantSDNode>(LHS.getOperand(1))6.92k
&&
4535
89.2k
            
isPowerOf2_64(LHS.getConstantOperandVal(1))6.80k
) {
4536
5.87k
          SDValue Test = LHS.getOperand(0);
4537
5.87k
          uint64_t Mask = LHS.getConstantOperandVal(1);
4538
5.87k
          return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4539
5.87k
                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4540
5.87k
                             Dest);
4541
5.87k
        }
4542
83.3k
4543
83.3k
        return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4544
83.3k
      } else 
if (9.71k
CC == ISD::SETLT9.71k
&&
LHS.getOpcode() != ISD::AND3.33k
) {
4545
3.32k
        // Don't combine AND since emitComparison converts the AND to an ANDS
4546
3.32k
        // (a.k.a. TST) and the test in the test bit and branch instruction
4547
3.32k
        // becomes redundant.  This would also increase register pressure.
4548
3.32k
        uint64_t Mask = LHS.getValueSizeInBits() - 1;
4549
3.32k
        return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4550
3.32k
                           DAG.getConstant(Mask, dl, MVT::i64), Dest);
4551
3.32k
      }
4552
152k
    }
4553
152k
    if (RHSC && 
RHSC->getSExtValue() == -186.7k
&&
CC == ISD::SETGT7.36k
&&
4554
152k
        
LHS.getOpcode() != ISD::AND6.53k
&&
ProduceNonFlagSettingCondBr6.52k
) {
4555
6.52k
      // Don't combine AND since emitComparison converts the AND to an ANDS
4556
6.52k
      // (a.k.a. TST) and the test in the test bit and branch instruction
4557
6.52k
      // becomes redundant.  This would also increase register pressure.
4558
6.52k
      uint64_t Mask = LHS.getValueSizeInBits() - 1;
4559
6.52k
      return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4560
6.52k
                         DAG.getConstant(Mask, dl, MVT::i64), Dest);
4561
6.52k
    }
4562
145k
4563
145k
    SDValue CCVal;
4564
145k
    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4565
145k
    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4566
145k
                       Cmp);
4567
145k
  }
4568
2.80k
4569
2.80k
  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
4570
2.80k
         LHS.getValueType() == MVT::f64);
4571
2.80k
4572
2.80k
  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4573
2.80k
  // clean.  Some of them require two branches to implement.
4574
2.80k
  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4575
2.80k
  AArch64CC::CondCode CC1, CC2;
4576
2.80k
  changeFPCCToAArch64CC(CC, CC1, CC2);
4577
2.80k
  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4578
2.80k
  SDValue BR1 =
4579
2.80k
      DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4580
2.80k
  if (CC2 != AArch64CC::AL) {
4581
41
    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4582
41
    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4583
41
                       Cmp);
4584
41
  }
4585
2.76k
4586
2.76k
  return BR1;
4587
2.76k
}
4588
4589
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4590
180
                                              SelectionDAG &DAG) const {
4591
180
  EVT VT = Op.getValueType();
4592
180
  SDLoc DL(Op);
4593
180
4594
180
  SDValue In1 = Op.getOperand(0);
4595
180
  SDValue In2 = Op.getOperand(1);
4596
180
  EVT SrcVT = In2.getValueType();
4597
180
4598
180
  if (SrcVT.bitsLT(VT))
4599
9
    In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4600
171
  else if (SrcVT.bitsGT(VT))
4601
13
    In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4602
180
4603
180
  EVT VecVT;
4604
180
  uint64_t EltMask;
4605
180
  SDValue VecVal1, VecVal2;
4606
180
4607
180
  auto setVecVal = [&] (int Idx) {
4608
180
    if (!VT.isVector()) {
4609
170
      VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4610
170
                                          DAG.getUNDEF(VecVT), In1);
4611
170
      VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4612
170
                                          DAG.getUNDEF(VecVT), In2);
4613
170
    } else {
4614
10
      VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4615
10
      VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4616
10
    }
4617
180
  };
4618
180
4619
180
  if (VT == MVT::f32 || 
VT == MVT::v2f32131
||
VT == MVT::v4f32128
) {
4620
53
    VecVT = (VT == MVT::v2f32 ? 
MVT::v2i323
:
MVT::v4i3250
);
4621
53
    EltMask = 0x80000000ULL;
4622
53
    setVecVal(AArch64::ssub);
4623
127
  } else if (VT == MVT::f64 || 
VT == MVT::v2f6414
) {
4624
119
    VecVT = MVT::v2i64;
4625
119
4626
119
    // We want to materialize a mask with the high bit set, but the AdvSIMD
4627
119
    // immediate moves cannot materialize that in a single instruction for
4628
119
    // 64-bit elements. Instead, materialize zero and then negate it.
4629
119
    EltMask = 0;
4630
119
4631
119
    setVecVal(AArch64::dsub);
4632
119
  } else 
if (8
VT == MVT::f168
||
VT == MVT::v4f160
||
VT == MVT::v8f160
) {
4633
8
    VecVT = (VT == MVT::v4f16 ? 
MVT::v4i160
: MVT::v8i16);
4634
8
    EltMask = 0x8000ULL;
4635
8
    setVecVal(AArch64::hsub);
4636
8
  } else {
4637
0
    llvm_unreachable("Invalid type for copysign!");
4638
0
  }
4639
180
4640
180
  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4641
180
4642
180
  // If we couldn't materialize the mask above, then the mask vector will be
4643
180
  // the zero vector, and we need to negate it here.
4644
180
  if (VT == MVT::f64 || 
VT == MVT::v2f6467
) {
4645
119
    BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4646
119
    BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4647
119
    BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4648
119
  }
4649
180
4650
180
  SDValue Sel =
4651
180
      DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4652
180
4653
180
  if (VT == MVT::f16)
4654
8
    return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4655
172
  if (VT == MVT::f32)
4656
49
    return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4657
123
  else if (VT == MVT::f64)
4658
113
    return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4659
10
  else
4660
10
    return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4661
172
}
4662
4663
112
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4664
112
  if (DAG.getMachineFunction().getFunction().hasFnAttribute(
4665
112
          Attribute::NoImplicitFloat))
4666
4
    return SDValue();
4667
108
4668
108
  if (!Subtarget->hasNEON())
4669
5
    return SDValue();
4670
103
4671
103
  // While there is no integer popcount instruction, it can
4672
103
  // be more efficiently lowered to the following sequence that uses
4673
103
  // AdvSIMD registers/instructions as long as the copies to/from
4674
103
  // the AdvSIMD registers are cheap.
4675
103
  //  FMOV    D0, X0        // copy 64-bit int to vector, high bits zero'd
4676
103
  //  CNT     V0.8B, V0.8B  // 8xbyte pop-counts
4677
103
  //  ADDV    B0, V0.8B     // sum 8xbyte pop-counts
4678
103
  //  UMOV    X0, V0.B[0]   // copy byte result back to integer reg
4679
103
  SDValue Val = Op.getOperand(0);
4680
103
  SDLoc DL(Op);
4681
103
  EVT VT = Op.getValueType();
4682
103
4683
103
  if (VT == MVT::i32 || 
VT == MVT::i6495
) {
4684
88
    if (VT == MVT::i32)
4685
8
      Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4686
88
    Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4687
88
4688
88
    SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4689
88
    SDValue UaddLV = DAG.getNode(
4690
88
        ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4691
88
        DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4692
88
4693
88
    if (VT == MVT::i64)
4694
80
      UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4695
88
    return UaddLV;
4696
88
  }
4697
15
4698
15
  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
4699
15
          VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
4700
15
         "Unexpected type for custom ctpop lowering");
4701
15
4702
15
  EVT VT8Bit = VT.is64BitVector() ? 
MVT::v8i87
:
MVT::v16i88
;
4703
15
  Val = DAG.getBitcast(VT8Bit, Val);
4704
15
  Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
4705
15
4706
15
  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
4707
15
  unsigned EltSize = 8;
4708
15
  unsigned NumElts = VT.is64BitVector() ? 
87
:
168
;
4709
53
  while (EltSize != VT.getScalarSizeInBits()) {
4710
38
    EltSize *= 2;
4711
38
    NumElts /= 2;
4712
38
    MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
4713
38
    Val = DAG.getNode(
4714
38
        ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
4715
38
        DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
4716
38
  }
4717
15
4718
15
  return Val;
4719
15
}
4720
4721
11.6k
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4722
11.6k
4723
11.6k
  if (Op.getValueType().isVector())
4724
2.51k
    return LowerVSETCC(Op, DAG);
4725
9.14k
4726
9.14k
  SDValue LHS = Op.getOperand(0);
4727
9.14k
  SDValue RHS = Op.getOperand(1);
4728
9.14k
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4729
9.14k
  SDLoc dl(Op);
4730
9.14k
4731
9.14k
  // We chose ZeroOrOneBooleanContents, so use zero and one.
4732
9.14k
  EVT VT = Op.getValueType();
4733
9.14k
  SDValue TVal = DAG.getConstant(1, dl, VT);
4734
9.14k
  SDValue FVal = DAG.getConstant(0, dl, VT);
4735
9.14k
4736
9.14k
  // Handle f128 first, since one possible outcome is a normal integer
4737
9.14k
  // comparison which gets picked up by the next if statement.
4738
9.14k
  if (LHS.getValueType() == MVT::f128) {
4739
6
    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4740
6
4741
6
    // If softenSetCCOperands returned a scalar, use it.
4742
6
    if (!RHS.getNode()) {
4743
1
      assert(LHS.getValueType() == Op.getValueType() &&
4744
1
             "Unexpected setcc expansion!");
4745
1
      return LHS;
4746
1
    }
4747
9.14k
  }
4748
9.14k
4749
9.14k
  if (LHS.getValueType().isInteger()) {
4750
9.01k
    SDValue CCVal;
4751
9.01k
    SDValue Cmp =
4752
9.01k
        getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4753
9.01k
4754
9.01k
    // Note that we inverted the condition above, so we reverse the order of
4755
9.01k
    // the true and false operands here.  This will allow the setcc to be
4756
9.01k
    // matched to a single CSINC instruction.
4757
9.01k
    return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4758
9.01k
  }
4759
129
4760
129
  // Now we know we're dealing with FP values.
4761
129
  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
4762
129
         LHS.getValueType() == MVT::f64);
4763
129
4764
129
  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead
4765
129
  // and do the comparison.
4766
129
  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4767
129
4768
129
  AArch64CC::CondCode CC1, CC2;
4769
129
  changeFPCCToAArch64CC(CC, CC1, CC2);
4770
129
  if (CC2 == AArch64CC::AL) {
4771
125
    changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4772
125
    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4773
125
4774
125
    // Note that we inverted the condition above, so we reverse the order of
4775
125
    // the true and false operands here.  This will allow the setcc to be
4776
125
    // matched to a single CSINC instruction.
4777
125
    return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4778
125
  } else {
4779
4
    // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4780
4
    // totally clean.  Some of them require two CSELs to implement.  As is in
4781
4
    // this case, we emit the first CSEL and then emit a second using the output
4782
4
    // of the first as the RHS.  We're effectively OR'ing the two CC's together.
4783
4
4784
4
    // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4785
4
    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4786
4
    SDValue CS1 =
4787
4
        DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4788
4
4789
4
    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4790
4
    return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4791
4
  }
4792
129
}
4793
4794
SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4795
                                              SDValue RHS, SDValue TVal,
4796
                                              SDValue FVal, const SDLoc &dl,
4797
16.0k
                                              SelectionDAG &DAG) const {
4798
16.0k
  // Handle f128 first, because it will result in a comparison of some RTLIB
4799
16.0k
  // call result against zero.
4800
16.0k
  if (LHS.getValueType() == MVT::f128) {
4801
0
    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4802
0
4803
0
    // If softenSetCCOperands returned a scalar, we need to compare the result
4804
0
    // against zero to select between true and false values.
4805
0
    if (!RHS.getNode()) {
4806
0
      RHS = DAG.getConstant(0, dl, LHS.getValueType());
4807
0
      CC = ISD::SETNE;
4808
0
    }
4809
0
  }
4810
16.0k
4811
16.0k
  // Also handle f16, for which we need to do a f32 comparison.
4812
16.0k
  if (LHS.getValueType() == MVT::f16 && 
!Subtarget->hasFullFP16()125
) {
4813
113
    LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4814
113
    RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4815
113
  }
4816
16.0k
4817
16.0k
  // Next, handle integers.
4818
16.0k
  if (LHS.getValueType().isInteger()) {
4819
15.3k
    assert((LHS.getValueType() == RHS.getValueType()) &&
4820
15.3k
           (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
4821
15.3k
4822
15.3k
    unsigned Opcode = AArch64ISD::CSEL;
4823
15.3k
4824
15.3k
    // If both the TVal and the FVal are constants, see if we can swap them in
4825
15.3k
    // order to for a CSINV or CSINC out of them.
4826
15.3k
    ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4827
15.3k
    ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4828
15.3k
4829
15.3k
    if (CTVal && 
CFVal2.97k
&&
CTVal->isAllOnesValue()2.20k
&&
CFVal->isNullValue()96
) {
4830
27
      std::swap(TVal, FVal);
4831
27
      std::swap(CTVal, CFVal);
4832
27
      CC = ISD::getSetCCInverse(CC, true);
4833
15.3k
    } else if (CTVal && 
CFVal2.95k
&&
CTVal->isOne()2.18k
&&
CFVal->isNullValue()55
) {
4834
0
      std::swap(TVal, FVal);
4835