Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86ISelLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that X86 uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "X86ISelLowering.h"
15
#include "Utils/X86ShuffleDecode.h"
16
#include "X86CallingConv.h"
17
#include "X86FrameLowering.h"
18
#include "X86InstrBuilder.h"
19
#include "X86IntrinsicsInfo.h"
20
#include "X86MachineFunctionInfo.h"
21
#include "X86TargetMachine.h"
22
#include "X86TargetObjectFile.h"
23
#include "llvm/ADT/SmallBitVector.h"
24
#include "llvm/ADT/SmallSet.h"
25
#include "llvm/ADT/Statistic.h"
26
#include "llvm/ADT/StringExtras.h"
27
#include "llvm/ADT/StringSwitch.h"
28
#include "llvm/Analysis/EHPersonalities.h"
29
#include "llvm/CodeGen/IntrinsicLowering.h"
30
#include "llvm/CodeGen/MachineFrameInfo.h"
31
#include "llvm/CodeGen/MachineFunction.h"
32
#include "llvm/CodeGen/MachineInstrBuilder.h"
33
#include "llvm/CodeGen/MachineJumpTableInfo.h"
34
#include "llvm/CodeGen/MachineModuleInfo.h"
35
#include "llvm/CodeGen/MachineRegisterInfo.h"
36
#include "llvm/CodeGen/TargetLowering.h"
37
#include "llvm/CodeGen/WinEHFuncInfo.h"
38
#include "llvm/IR/CallSite.h"
39
#include "llvm/IR/CallingConv.h"
40
#include "llvm/IR/Constants.h"
41
#include "llvm/IR/DerivedTypes.h"
42
#include "llvm/IR/DiagnosticInfo.h"
43
#include "llvm/IR/Function.h"
44
#include "llvm/IR/GlobalAlias.h"
45
#include "llvm/IR/GlobalVariable.h"
46
#include "llvm/IR/Instructions.h"
47
#include "llvm/IR/Intrinsics.h"
48
#include "llvm/MC/MCAsmInfo.h"
49
#include "llvm/MC/MCContext.h"
50
#include "llvm/MC/MCExpr.h"
51
#include "llvm/MC/MCSymbol.h"
52
#include "llvm/Support/CommandLine.h"
53
#include "llvm/Support/Debug.h"
54
#include "llvm/Support/ErrorHandling.h"
55
#include "llvm/Support/KnownBits.h"
56
#include "llvm/Support/MathExtras.h"
57
#include "llvm/Target/TargetOptions.h"
58
#include <algorithm>
59
#include <bitset>
60
#include <cctype>
61
#include <numeric>
62
using namespace llvm;
63
64
#define DEBUG_TYPE "x86-isel"
65
66
STATISTIC(NumTailCalls, "Number of tail calls");
67
68
static cl::opt<bool> ExperimentalVectorWideningLegalization(
69
    "x86-experimental-vector-widening-legalization", cl::init(false),
70
    cl::desc("Enable an experimental vector type legalization through widening "
71
             "rather than promotion."),
72
    cl::Hidden);
73
74
static cl::opt<int> ExperimentalPrefLoopAlignment(
75
    "x86-experimental-pref-loop-alignment", cl::init(4),
76
    cl::desc("Sets the preferable loop alignment for experiments "
77
             "(the last x86-experimental-pref-loop-alignment bits"
78
             " of the loop header PC will be 0)."),
79
    cl::Hidden);
80
81
static cl::opt<bool> MulConstantOptimization(
82
    "mul-constant-optimization", cl::init(true),
83
    cl::desc("Replace 'mul x, Const' with more effective instructions like "
84
             "SHIFT, LEA, etc."),
85
    cl::Hidden);
86
87
/// Call this when the user attempts to do something unsupported, like
88
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
89
/// report_fatal_error, so calling code should attempt to recover without
90
/// crashing.
91
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
92
10
                             const char *Msg) {
93
10
  MachineFunction &MF = DAG.getMachineFunction();
94
10
  DAG.getContext()->diagnose(
95
10
      DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
96
10
}
97
98
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
99
                                     const X86Subtarget &STI)
100
15.2k
    : TargetLowering(TM), Subtarget(STI) {
101
15.2k
  bool UseX87 = !Subtarget.useSoftFloat() && 
Subtarget.hasX87()15.2k
;
102
15.2k
  X86ScalarSSEf64 = Subtarget.hasSSE2();
103
15.2k
  X86ScalarSSEf32 = Subtarget.hasSSE1();
104
15.2k
  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
105
15.2k
106
15.2k
  // Set up the TargetLowering object.
107
15.2k
108
15.2k
  // X86 is weird. It always uses i8 for shift amounts and setcc results.
109
15.2k
  setBooleanContents(ZeroOrOneBooleanContent);
110
15.2k
  // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
111
15.2k
  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
112
15.2k
113
15.2k
  // For 64-bit, since we have so many registers, use the ILP scheduler.
114
15.2k
  // For 32-bit, use the register pressure specific scheduling.
115
15.2k
  // For Atom, always use ILP scheduling.
116
15.2k
  if (Subtarget.isAtom())
117
35
    setSchedulingPreference(Sched::ILP);
118
15.2k
  else if (Subtarget.is64Bit())
119
12.0k
    setSchedulingPreference(Sched::ILP);
120
3.14k
  else
121
3.14k
    setSchedulingPreference(Sched::RegPressure);
122
15.2k
  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
123
15.2k
  setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
124
15.2k
125
15.2k
  // Bypass expensive divides and use cheaper ones.
126
15.2k
  if (TM.getOptLevel() >= CodeGenOpt::Default) {
127
12.5k
    if (Subtarget.hasSlowDivide32())
128
33
      addBypassSlowDiv(32, 8);
129
12.5k
    if (Subtarget.hasSlowDivide64() && 
Subtarget.is64Bit()1.30k
)
130
1.22k
      addBypassSlowDiv(64, 32);
131
12.5k
  }
132
15.2k
133
15.2k
  if (Subtarget.isTargetWindowsMSVC() ||
134
15.2k
      
Subtarget.isTargetWindowsItanium()14.6k
) {
135
676
    // Setup Windows compiler runtime calls.
136
676
    setLibcallName(RTLIB::SDIV_I64, "_alldiv");
137
676
    setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
138
676
    setLibcallName(RTLIB::SREM_I64, "_allrem");
139
676
    setLibcallName(RTLIB::UREM_I64, "_aullrem");
140
676
    setLibcallName(RTLIB::MUL_I64, "_allmul");
141
676
    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
142
676
    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
143
676
    setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
144
676
    setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
145
676
    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
146
676
  }
147
15.2k
148
15.2k
  if (Subtarget.isTargetDarwin()) {
149
6.50k
    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
150
6.50k
    setUseUnderscoreSetJmp(false);
151
6.50k
    setUseUnderscoreLongJmp(false);
152
8.76k
  } else if (Subtarget.isTargetWindowsGNU()) {
153
97
    // MS runtime is weird: it exports _setjmp, but longjmp!
154
97
    setUseUnderscoreSetJmp(true);
155
97
    setUseUnderscoreLongJmp(false);
156
8.66k
  } else {
157
8.66k
    setUseUnderscoreSetJmp(true);
158
8.66k
    setUseUnderscoreLongJmp(true);
159
8.66k
  }
160
15.2k
161
15.2k
  // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
162
15.2k
  // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
163
15.2k
  // FIXME: Should we be limitting the atomic size on other configs? Default is
164
15.2k
  // 1024.
165
15.2k
  if (!Subtarget.hasCmpxchg8b())
166
49
    setMaxAtomicSizeInBitsSupported(32);
167
15.2k
168
15.2k
  // Set up the register classes.
169
15.2k
  addRegisterClass(MVT::i8, &X86::GR8RegClass);
170
15.2k
  addRegisterClass(MVT::i16, &X86::GR16RegClass);
171
15.2k
  addRegisterClass(MVT::i32, &X86::GR32RegClass);
172
15.2k
  if (Subtarget.is64Bit())
173
12.0k
    addRegisterClass(MVT::i64, &X86::GR64RegClass);
174
15.2k
175
15.2k
  for (MVT VT : MVT::integer_valuetypes())
176
91.5k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
177
15.2k
178
15.2k
  // We don't accept any truncstore of integer registers.
179
15.2k
  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
180
15.2k
  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
181
15.2k
  setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
182
15.2k
  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
183
15.2k
  setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
184
15.2k
  setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
185
15.2k
186
15.2k
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187
15.2k
188
15.2k
  // SETOEQ and SETUNE require checking two conditions.
189
15.2k
  setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
190
15.2k
  setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
191
15.2k
  setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
192
15.2k
  setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
193
15.2k
  setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
194
15.2k
  setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
195
15.2k
196
15.2k
  // Integer absolute.
197
15.2k
  if (Subtarget.hasCMov()) {
198
13.6k
    setOperationAction(ISD::ABS            , MVT::i16  , Custom);
199
13.6k
    setOperationAction(ISD::ABS            , MVT::i32  , Custom); 
200
13.6k
  }
201
15.2k
  setOperationAction(ISD::ABS              , MVT::i64  , Custom);
202
15.2k
203
15.2k
  // Funnel shifts.
204
30.5k
  for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
205
30.5k
    setOperationAction(ShiftOp             , MVT::i16  , Custom);
206
30.5k
    setOperationAction(ShiftOp             , MVT::i32  , Custom);
207
30.5k
    if (Subtarget.is64Bit())
208
24.2k
      setOperationAction(ShiftOp           , MVT::i64  , Custom);
209
30.5k
  }
210
15.2k
211
15.2k
  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
212
15.2k
  // operation.
213
15.2k
  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
214
15.2k
  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
215
15.2k
  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
216
15.2k
217
15.2k
  if (Subtarget.is64Bit()) {
218
12.1k
    if (!Subtarget.useSoftFloat() && 
Subtarget.hasAVX512()12.0k
)
219
1.48k
      // f32/f64 are legal, f80 is custom.
220
1.48k
      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
221
10.6k
    else
222
10.6k
      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
223
12.1k
    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
224
12.1k
  } else 
if (3.16k
!Subtarget.useSoftFloat()3.16k
) {
225
3.15k
    // We have an algorithm for SSE2->double, and we turn this into a
226
3.15k
    // 64-bit FILD followed by conditional FADD for other targets.
227
3.15k
    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
228
3.15k
    // We have an algorithm for SSE2, and we turn this into a 64-bit
229
3.15k
    // FILD or VCVTUSI2SS/SD for other targets.
230
3.15k
    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
231
3.15k
  } else {
232
9
    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
233
9
  }
234
15.2k
235
15.2k
  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
236
15.2k
  // this operation.
237
15.2k
  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
238
15.2k
  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
239
15.2k
240
15.2k
  if (!Subtarget.useSoftFloat()) {
241
15.2k
    // SSE has no i16 to fp conversion, only i32.
242
15.2k
    if (X86ScalarSSEf32) {
243
13.6k
      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
244
13.6k
      // f32 and f64 cases are Legal, f80 case is not
245
13.6k
      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
246
13.6k
    } else {
247
1.63k
      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
248
1.63k
      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
249
1.63k
    }
250
15.2k
  } else {
251
20
    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
252
20
    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Expand);
253
20
  }
254
15.2k
255
15.2k
  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
256
15.2k
  // this operation.
257
15.2k
  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
258
15.2k
  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
259
15.2k
260
15.2k
  if (!Subtarget.useSoftFloat()) {
261
15.2k
    // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
262
15.2k
    // are Legal, f80 is custom lowered.
263
15.2k
    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
264
15.2k
    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
265
15.2k
266
15.2k
    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
267
15.2k
    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
268
15.2k
  } else {
269
23
    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
270
23
    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Expand);
271
23
    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Expand);
272
23
  }
273
15.2k
274
15.2k
  // Handle FP_TO_UINT by promoting the destination to a larger signed
275
15.2k
  // conversion.
276
15.2k
  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
277
15.2k
  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
278
15.2k
  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
279
15.2k
280
15.2k
  if (Subtarget.is64Bit()) {
281
12.0k
    if (!Subtarget.useSoftFloat() && 
Subtarget.hasAVX512()12.0k
) {
282
1.48k
      // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
283
1.48k
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
284
1.48k
      setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Custom);
285
10.6k
    } else {
286
10.6k
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
287
10.6k
      setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
288
10.6k
    }
289
12.0k
  } else 
if (3.16k
!Subtarget.useSoftFloat()3.16k
) {
290
3.15k
    // Since AVX is a superset of SSE3, only check for SSE here.
291
3.15k
    if (Subtarget.hasSSE1() && 
!Subtarget.hasSSE3()1.52k
)
292
361
      // Expand FP_TO_UINT into a select.
293
361
      // FIXME: We would like to use a Custom expander here eventually to do
294
361
      // the optimal thing for SSE vs. the default expansion in the legalizer.
295
361
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
296
2.79k
    else
297
2.79k
      // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
298
2.79k
      // With SSE3 we can use fisttpll to convert to a signed i64; without
299
2.79k
      // SSE, we're stuck with a fistpll.
300
2.79k
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
301
3.15k
302
3.15k
    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
303
3.15k
  }
304
15.2k
305
15.2k
  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
306
15.2k
  if (!X86ScalarSSEf64) {
307
1.71k
    setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
308
1.71k
    setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
309
1.71k
    if (Subtarget.is64Bit()) {
310
33
      setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
311
33
      // Without SSE, i64->f64 goes through memory.
312
33
      setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
313
33
    }
314
13.5k
  } else if (!Subtarget.is64Bit())
315
1.47k
    setOperationAction(ISD::BITCAST      , MVT::i64  , Custom);
316
15.2k
317
15.2k
  // Scalar integer divide and remainder are lowered to use operations that
318
15.2k
  // produce two results, to match the available instructions. This exposes
319
15.2k
  // the two-result form to trivial CSE, which is able to combine x/y and x%y
320
15.2k
  // into a single instruction.
321
15.2k
  //
322
15.2k
  // Scalar integer multiply-high is also lowered to use two-result
323
15.2k
  // operations, to match the available instructions. However, plain multiply
324
15.2k
  // (low) operations are left as Legal, as there are single-result
325
15.2k
  // instructions for this in x86. Using the two-result multiply instructions
326
15.2k
  // when both high and low results are needed must be arranged by dagcombine.
327
61.0k
  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
328
61.0k
    setOperationAction(ISD::MULHS, VT, Expand);
329
61.0k
    setOperationAction(ISD::MULHU, VT, Expand);
330
61.0k
    setOperationAction(ISD::SDIV, VT, Expand);
331
61.0k
    setOperationAction(ISD::UDIV, VT, Expand);
332
61.0k
    setOperationAction(ISD::SREM, VT, Expand);
333
61.0k
    setOperationAction(ISD::UREM, VT, Expand);
334
61.0k
  }
335
15.2k
336
15.2k
  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
337
15.2k
  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
338
15.2k
  for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
339
122k
                   MVT::i8,  MVT::i16, MVT::i32, MVT::i64 }) {
340
122k
    setOperationAction(ISD::BR_CC,     VT, Expand);
341
122k
    setOperationAction(ISD::SELECT_CC, VT, Expand);
342
122k
  }
343
15.2k
  if (Subtarget.is64Bit())
344
12.0k
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
345
15.2k
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
346
15.2k
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
347
15.2k
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
348
15.2k
  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
349
15.2k
350
15.2k
  setOperationAction(ISD::FREM             , MVT::f32  , Expand);
351
15.2k
  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
352
15.2k
  setOperationAction(ISD::FREM             , MVT::f80  , Expand);
353
15.2k
  setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
354
15.2k
355
15.2k
  // Promote the i8 variants and force them on up to i32 which has a shorter
356
15.2k
  // encoding.
357
15.2k
  setOperationPromotedToType(ISD::CTTZ           , MVT::i8   , MVT::i32);
358
15.2k
  setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
359
15.2k
  if (!Subtarget.hasBMI()) {
360
13.6k
    setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
361
13.6k
    setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
362
13.6k
    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Legal);
363
13.6k
    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Legal);
364
13.6k
    if (Subtarget.is64Bit()) {
365
10.6k
      setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
366
10.6k
      setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
367
10.6k
    }
368
13.6k
  }
369
15.2k
370
15.2k
  if (Subtarget.hasLZCNT()) {
371
1.55k
    // When promoting the i8 variants, force them to i32 for a shorter
372
1.55k
    // encoding.
373
1.55k
    setOperationPromotedToType(ISD::CTLZ           , MVT::i8   , MVT::i32);
374
1.55k
    setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
375
13.7k
  } else {
376
13.7k
    setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
377
13.7k
    setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
378
13.7k
    setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
379
13.7k
    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
380
13.7k
    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
381
13.7k
    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
382
13.7k
    if (Subtarget.is64Bit()) {
383
10.6k
      setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
384
10.6k
      setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
385
10.6k
    }
386
13.7k
  }
387
15.2k
388
15.2k
  // Special handling for half-precision floating point conversions.
389
15.2k
  // If we don't have F16C support, then lower half float conversions
390
15.2k
  // into library calls.
391
15.2k
  if (Subtarget.useSoftFloat() || 
!Subtarget.hasF16C()15.2k
) {
392
12.3k
    setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
393
12.3k
    setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
394
12.3k
  }
395
15.2k
396
15.2k
  // There's never any support for operations beyond MVT::f32.
397
15.2k
  setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
398
15.2k
  setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
399
15.2k
  setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
400
15.2k
  setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
401
15.2k
402
15.2k
  setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
403
15.2k
  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
404
15.2k
  setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
405
15.2k
  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
406
15.2k
  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
407
15.2k
  setTruncStoreAction(MVT::f80, MVT::f16, Expand);
408
15.2k
409
15.2k
  if (Subtarget.hasPOPCNT()) {
410
2.14k
    setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
411
13.1k
  } else {
412
13.1k
    setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
413
13.1k
    setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
414
13.1k
    setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
415
13.1k
    if (Subtarget.is64Bit())
416
10.1k
      setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
417
2.99k
    else
418
2.99k
      setOperationAction(ISD::CTPOP        , MVT::i64  , Custom);
419
13.1k
  }
420
15.2k
421
15.2k
  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
422
15.2k
423
15.2k
  if (!Subtarget.hasMOVBE())
424
13.6k
    setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
425
15.2k
426
15.2k
  // These should be promoted to a larger select which is supported.
427
15.2k
  setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
428
15.2k
  // X86 wants to expand cmov itself.
429
61.0k
  for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
430
61.0k
    setOperationAction(ISD::SELECT, VT, Custom);
431
61.0k
    setOperationAction(ISD::SETCC, VT, Custom);
432
61.0k
  }
433
61.0k
  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
434
61.0k
    if (VT == MVT::i64 && 
!Subtarget.is64Bit()15.2k
)
435
3.16k
      continue;
436
57.8k
    setOperationAction(ISD::SELECT, VT, Custom);
437
57.8k
    setOperationAction(ISD::SETCC,  VT, Custom);
438
57.8k
  }
439
15.2k
440
15.2k
  // Custom action for SELECT MMX and expand action for SELECT_CC MMX
441
15.2k
  setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
442
15.2k
  setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
443
15.2k
444
15.2k
  setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
445
15.2k
  // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
446
15.2k
  // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
447
15.2k
  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
448
15.2k
  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
449
15.2k
  setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
450
15.2k
  if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
451
4
    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
452
15.2k
453
15.2k
  // Darwin ABI issue.
454
30.5k
  for (auto VT : { MVT::i32, MVT::i64 }) {
455
30.5k
    if (VT == MVT::i64 && 
!Subtarget.is64Bit()15.2k
)
456
3.16k
      continue;
457
27.3k
    setOperationAction(ISD::ConstantPool    , VT, Custom);
458
27.3k
    setOperationAction(ISD::JumpTable       , VT, Custom);
459
27.3k
    setOperationAction(ISD::GlobalAddress   , VT, Custom);
460
27.3k
    setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
461
27.3k
    setOperationAction(ISD::ExternalSymbol  , VT, Custom);
462
27.3k
    setOperationAction(ISD::BlockAddress    , VT, Custom);
463
27.3k
  }
464
15.2k
465
15.2k
  // 64-bit shl, sra, srl (iff 32-bit x86)
466
30.5k
  for (auto VT : { MVT::i32, MVT::i64 }) {
467
30.5k
    if (VT == MVT::i64 && 
!Subtarget.is64Bit()15.2k
)
468
3.16k
      continue;
469
27.3k
    setOperationAction(ISD::SHL_PARTS, VT, Custom);
470
27.3k
    setOperationAction(ISD::SRA_PARTS, VT, Custom);
471
27.3k
    setOperationAction(ISD::SRL_PARTS, VT, Custom);
472
27.3k
  }
473
15.2k
474
15.2k
  if (Subtarget.hasSSEPrefetch() || 
Subtarget.has3DNow()1.64k
)
475
13.6k
    setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
476
15.2k
477
15.2k
  setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
478
15.2k
479
15.2k
  // Expand certain atomics
480
61.0k
  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
481
61.0k
    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
482
61.0k
    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
483
61.0k
    setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
484
61.0k
    setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
485
61.0k
    setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
486
61.0k
    setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
487
61.0k
    setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
488
61.0k
  }
489
15.2k
490
15.2k
  if (!Subtarget.is64Bit())
491
3.16k
    setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
492
15.2k
493
15.2k
  if (Subtarget.hasCmpxchg16b()) {
494
4.04k
    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
495
4.04k
  }
496
15.2k
497
15.2k
  // FIXME - use subtarget debug flags
498
15.2k
  if (!Subtarget.isTargetDarwin() && 
!Subtarget.isTargetELF()8.76k
&&
499
15.2k
      
!Subtarget.isTargetCygMing()787
&&
!Subtarget.isTargetWin64()681
&&
500
15.2k
      
TM.Options.ExceptionModel != ExceptionHandling::SjLj281
) {
501
281
    setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
502
281
  }
503
15.2k
504
15.2k
  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
505
15.2k
  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
506
15.2k
507
15.2k
  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
508
15.2k
  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
509
15.2k
510
15.2k
  setOperationAction(ISD::TRAP, MVT::Other, Legal);
511
15.2k
  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
512
15.2k
513
15.2k
  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
514
15.2k
  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
515
15.2k
  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
516
15.2k
  bool Is64Bit = Subtarget.is64Bit();
517
15.2k
  setOperationAction(ISD::VAARG,  MVT::Other, Is64Bit ? 
Custom12.0k
:
Expand3.16k
);
518
15.2k
  setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? 
Custom12.0k
:
Expand3.16k
);
519
15.2k
520
15.2k
  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
521
15.2k
  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
522
15.2k
523
15.2k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
524
15.2k
525
15.2k
  // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
526
15.2k
  setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
527
15.2k
  setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
528
15.2k
529
15.2k
  if (!Subtarget.useSoftFloat() && 
X86ScalarSSEf6415.2k
) {
530
13.5k
    // f32 and f64 use SSE.
531
13.5k
    // Set up the FP register classes.
532
13.5k
    addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? 
&X86::FR32XRegClass1.66k
533
13.5k
                                                     : 
&X86::FR32RegClass11.8k
);
534
13.5k
    addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? 
&X86::FR64XRegClass1.66k
535
13.5k
                                                     : 
&X86::FR64RegClass11.8k
);
536
13.5k
537
13.5k
    // Disable f32->f64 extload as we can only generate this in one instruction
538
13.5k
    // under optsize. So its easier to pattern match (fpext (load)) for that
539
13.5k
    // case instead of needing to emit 2 instructions for extload in the
540
13.5k
    // non-optsize case.
541
13.5k
    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
542
13.5k
543
27.0k
    for (auto VT : { MVT::f32, MVT::f64 }) {
544
27.0k
      // Use ANDPD to simulate FABS.
545
27.0k
      setOperationAction(ISD::FABS, VT, Custom);
546
27.0k
547
27.0k
      // Use XORP to simulate FNEG.
548
27.0k
      setOperationAction(ISD::FNEG, VT, Custom);
549
27.0k
550
27.0k
      // Use ANDPD and ORPD to simulate FCOPYSIGN.
551
27.0k
      setOperationAction(ISD::FCOPYSIGN, VT, Custom);
552
27.0k
553
27.0k
      // These might be better off as horizontal vector ops.
554
27.0k
      setOperationAction(ISD::FADD, VT, Custom);
555
27.0k
      setOperationAction(ISD::FSUB, VT, Custom);
556
27.0k
557
27.0k
      // We don't support sin/cos/fmod
558
27.0k
      setOperationAction(ISD::FSIN   , VT, Expand);
559
27.0k
      setOperationAction(ISD::FCOS   , VT, Expand);
560
27.0k
      setOperationAction(ISD::FSINCOS, VT, Expand);
561
27.0k
    }
562
13.5k
563
13.5k
    // Lower this to MOVMSK plus an AND.
564
13.5k
    setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
565
13.5k
    setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
566
13.5k
567
13.5k
  } else 
if (1.73k
!useSoftFloat()1.73k
&&
X86ScalarSSEf321.71k
&&
(73
UseX8773
||
Is64Bit5
)) {
568
70
    // Use SSE for f32, x87 for f64.
569
70
    // Set up the FP register classes.
570
70
    addRegisterClass(MVT::f32, &X86::FR32RegClass);
571
70
    if (UseX87)
572
68
      addRegisterClass(MVT::f64, &X86::RFP64RegClass);
573
70
574
70
    // Use ANDPS to simulate FABS.
575
70
    setOperationAction(ISD::FABS , MVT::f32, Custom);
576
70
577
70
    // Use XORP to simulate FNEG.
578
70
    setOperationAction(ISD::FNEG , MVT::f32, Custom);
579
70
580
70
    if (UseX87)
581
68
      setOperationAction(ISD::UNDEF, MVT::f64, Expand);
582
70
583
70
    // Use ANDPS and ORPS to simulate FCOPYSIGN.
584
70
    if (UseX87)
585
68
      setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
586
70
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
587
70
588
70
    // We don't support sin/cos/fmod
589
70
    setOperationAction(ISD::FSIN   , MVT::f32, Expand);
590
70
    setOperationAction(ISD::FCOS   , MVT::f32, Expand);
591
70
    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
592
70
593
70
    if (UseX87) {
594
68
      // Always expand sin/cos functions even though x87 has an instruction.
595
68
      setOperationAction(ISD::FSIN, MVT::f64, Expand);
596
68
      setOperationAction(ISD::FCOS, MVT::f64, Expand);
597
68
      setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
598
68
    }
599
1.66k
  } else if (UseX87) {
600
1.63k
    // f32 and f64 in x87.
601
1.63k
    // Set up the FP register classes.
602
1.63k
    addRegisterClass(MVT::f64, &X86::RFP64RegClass);
603
1.63k
    addRegisterClass(MVT::f32, &X86::RFP32RegClass);
604
1.63k
605
3.26k
    for (auto VT : { MVT::f32, MVT::f64 }) {
606
3.26k
      setOperationAction(ISD::UNDEF,     VT, Expand);
607
3.26k
      setOperationAction(ISD::FCOPYSIGN, VT, Expand);
608
3.26k
609
3.26k
      // Always expand sin/cos functions even though x87 has an instruction.
610
3.26k
      setOperationAction(ISD::FSIN   , VT, Expand);
611
3.26k
      setOperationAction(ISD::FCOS   , VT, Expand);
612
3.26k
      setOperationAction(ISD::FSINCOS, VT, Expand);
613
3.26k
    }
614
1.63k
  }
615
15.2k
616
15.2k
  // Expand FP32 immediates into loads from the stack, save special cases.
617
15.2k
  if (isTypeLegal(MVT::f32)) {
618
15.2k
    if (UseX87 && 
(getRegClassFor(MVT::f32) == &X86::RFP32RegClass)15.2k
) {
619
1.63k
      addLegalFPImmediate(APFloat(+0.0f)); // FLD0
620
1.63k
      addLegalFPImmediate(APFloat(+1.0f)); // FLD1
621
1.63k
      addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
622
1.63k
      addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
623
1.63k
    } else // SSE immediates.
624
13.6k
      addLegalFPImmediate(APFloat(+0.0f)); // xorps
625
15.2k
  }
626
15.2k
  // Expand FP64 immediates into loads from the stack, save special cases.
627
15.2k
  if (isTypeLegal(MVT::f64)) {
628
15.2k
    if (UseX87 && 
getRegClassFor(MVT::f64) == &X86::RFP64RegClass15.2k
) {
629
1.70k
      addLegalFPImmediate(APFloat(+0.0)); // FLD0
630
1.70k
      addLegalFPImmediate(APFloat(+1.0)); // FLD1
631
1.70k
      addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
632
1.70k
      addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
633
1.70k
    } else // SSE immediates.
634
13.5k
      addLegalFPImmediate(APFloat(+0.0)); // xorpd
635
15.2k
  }
636
15.2k
637
15.2k
  // We don't support FMA.
638
15.2k
  setOperationAction(ISD::FMA, MVT::f64, Expand);
639
15.2k
  setOperationAction(ISD::FMA, MVT::f32, Expand);
640
15.2k
641
15.2k
  // Long double always uses X87, except f128 in MMX.
642
15.2k
  if (UseX87) {
643
15.2k
    if (Subtarget.is64Bit() && 
Subtarget.hasMMX()12.0k
) {
644
4.78k
      addRegisterClass(MVT::f128, Subtarget.hasVLX() ? 
&X86::VR128XRegClass167
645
4.78k
                                                     : 
&X86::VR128RegClass4.61k
);
646
4.78k
      ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
647
4.78k
      setOperationAction(ISD::FABS , MVT::f128, Custom);
648
4.78k
      setOperationAction(ISD::FNEG , MVT::f128, Custom);
649
4.78k
      setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
650
4.78k
    }
651
15.2k
652
15.2k
    addRegisterClass(MVT::f80, &X86::RFP80RegClass);
653
15.2k
    setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
654
15.2k
    setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
655
15.2k
    {
656
15.2k
      APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
657
15.2k
      addLegalFPImmediate(TmpFlt);  // FLD0
658
15.2k
      TmpFlt.changeSign();
659
15.2k
      addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
660
15.2k
661
15.2k
      bool ignored;
662
15.2k
      APFloat TmpFlt2(+1.0);
663
15.2k
      TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
664
15.2k
                      &ignored);
665
15.2k
      addLegalFPImmediate(TmpFlt2);  // FLD1
666
15.2k
      TmpFlt2.changeSign();
667
15.2k
      addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
668
15.2k
    }
669
15.2k
670
15.2k
    // Always expand sin/cos functions even though x87 has an instruction.
671
15.2k
    setOperationAction(ISD::FSIN   , MVT::f80, Expand);
672
15.2k
    setOperationAction(ISD::FCOS   , MVT::f80, Expand);
673
15.2k
    setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
674
15.2k
675
15.2k
    setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
676
15.2k
    setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
677
15.2k
    setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
678
15.2k
    setOperationAction(ISD::FRINT,  MVT::f80, Expand);
679
15.2k
    setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
680
15.2k
    setOperationAction(ISD::FMA, MVT::f80, Expand);
681
15.2k
    setOperationAction(ISD::LROUND, MVT::f80, Expand);
682
15.2k
    setOperationAction(ISD::LLROUND, MVT::f80, Expand);
683
15.2k
    setOperationAction(ISD::LRINT, MVT::f80, Expand);
684
15.2k
    setOperationAction(ISD::LLRINT, MVT::f80, Expand);
685
15.2k
  }
686
15.2k
687
15.2k
  // Always use a library call for pow.
688
15.2k
  setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
689
15.2k
  setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
690
15.2k
  setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
691
15.2k
692
15.2k
  setOperationAction(ISD::FLOG, MVT::f80, Expand);
693
15.2k
  setOperationAction(ISD::FLOG2, MVT::f80, Expand);
694
15.2k
  setOperationAction(ISD::FLOG10, MVT::f80, Expand);
695
15.2k
  setOperationAction(ISD::FEXP, MVT::f80, Expand);
696
15.2k
  setOperationAction(ISD::FEXP2, MVT::f80, Expand);
697
15.2k
  setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
698
15.2k
  setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
699
15.2k
700
15.2k
  // Some FP actions are always expanded for vector types.
701
15.2k
  for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
702
91.5k
                   MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
703
91.5k
    setOperationAction(ISD::FSIN,      VT, Expand);
704
91.5k
    setOperationAction(ISD::FSINCOS,   VT, Expand);
705
91.5k
    setOperationAction(ISD::FCOS,      VT, Expand);
706
91.5k
    setOperationAction(ISD::FREM,      VT, Expand);
707
91.5k
    setOperationAction(ISD::FCOPYSIGN, VT, Expand);
708
91.5k
    setOperationAction(ISD::FPOW,      VT, Expand);
709
91.5k
    setOperationAction(ISD::FLOG,      VT, Expand);
710
91.5k
    setOperationAction(ISD::FLOG2,     VT, Expand);
711
91.5k
    setOperationAction(ISD::FLOG10,    VT, Expand);
712
91.5k
    setOperationAction(ISD::FEXP,      VT, Expand);
713
91.5k
    setOperationAction(ISD::FEXP2,     VT, Expand);
714
91.5k
  }
715
15.2k
716
15.2k
  // First set operation action for all vector types to either promote
717
15.2k
  // (for widening) or expand (for scalarization). Then we will selectively
718
15.2k
  // turn on ones that can be effectively codegen'd.
719
1.69M
  for (MVT VT : MVT::vector_valuetypes()) {
720
1.69M
    setOperationAction(ISD::SDIV, VT, Expand);
721
1.69M
    setOperationAction(ISD::UDIV, VT, Expand);
722
1.69M
    setOperationAction(ISD::SREM, VT, Expand);
723
1.69M
    setOperationAction(ISD::UREM, VT, Expand);
724
1.69M
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
725
1.69M
    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
726
1.69M
    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
727
1.69M
    setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
728
1.69M
    setOperationAction(ISD::FMA,  VT, Expand);
729
1.69M
    setOperationAction(ISD::FFLOOR, VT, Expand);
730
1.69M
    setOperationAction(ISD::FCEIL, VT, Expand);
731
1.69M
    setOperationAction(ISD::FTRUNC, VT, Expand);
732
1.69M
    setOperationAction(ISD::FRINT, VT, Expand);
733
1.69M
    setOperationAction(ISD::FNEARBYINT, VT, Expand);
734
1.69M
    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
735
1.69M
    setOperationAction(ISD::MULHS, VT, Expand);
736
1.69M
    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
737
1.69M
    setOperationAction(ISD::MULHU, VT, Expand);
738
1.69M
    setOperationAction(ISD::SDIVREM, VT, Expand);
739
1.69M
    setOperationAction(ISD::UDIVREM, VT, Expand);
740
1.69M
    setOperationAction(ISD::CTPOP, VT, Expand);
741
1.69M
    setOperationAction(ISD::CTTZ, VT, Expand);
742
1.69M
    setOperationAction(ISD::CTLZ, VT, Expand);
743
1.69M
    setOperationAction(ISD::ROTL, VT, Expand);
744
1.69M
    setOperationAction(ISD::ROTR, VT, Expand);
745
1.69M
    setOperationAction(ISD::BSWAP, VT, Expand);
746
1.69M
    setOperationAction(ISD::SETCC, VT, Expand);
747
1.69M
    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
748
1.69M
    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
749
1.69M
    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
750
1.69M
    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
751
1.69M
    setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
752
1.69M
    setOperationAction(ISD::TRUNCATE, VT, Expand);
753
1.69M
    setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
754
1.69M
    setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
755
1.69M
    setOperationAction(ISD::ANY_EXTEND, VT, Expand);
756
1.69M
    setOperationAction(ISD::SELECT_CC, VT, Expand);
757
187M
    for (MVT InnerVT : MVT::vector_valuetypes()) {
758
187M
      setTruncStoreAction(InnerVT, VT, Expand);
759
187M
760
187M
      setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
761
187M
      setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
762
187M
763
187M
      // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
764
187M
      // types, we have to deal with them whether we ask for Expansion or not.
765
187M
      // Setting Expand causes its own optimisation problems though, so leave
766
187M
      // them legal.
767
187M
      if (VT.getVectorElementType() == MVT::i1)
768
27.0M
        setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
769
187M
770
187M
      // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
771
187M
      // split/scalarized right now.
772
187M
      if (VT.getVectorElementType() == MVT::f16)
773
10.1M
        setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
774
187M
    }
775
1.69M
  }
776
15.2k
777
15.2k
  // FIXME: In order to prevent SSE instructions being expanded to MMX ones
778
15.2k
  // with -msoft-float, disable use of MMX as well.
779
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasMMX()15.2k
) {
780
5.66k
    addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
781
5.66k
    // No operations on x86mmx supported, everything uses intrinsics.
782
5.66k
  }
783
15.2k
784
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasSSE1()15.2k
) {
785
13.6k
    addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? 
&X86::VR128XRegClass786
786
13.6k
                                                    : 
&X86::VR128RegClass12.8k
);
787
13.6k
788
13.6k
    setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
789
13.6k
    setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
790
13.6k
    setOperationAction(ISD::FCOPYSIGN,          MVT::v4f32, Custom);
791
13.6k
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
792
13.6k
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
793
13.6k
    setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
794
13.6k
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
795
13.6k
    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
796
13.6k
    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
797
13.6k
798
13.6k
    setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
799
13.6k
    setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
800
13.6k
  }
801
15.2k
802
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasSSE2()15.2k
) {
803
13.5k
    addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? 
&X86::VR128XRegClass786
804
13.5k
                                                    : 
&X86::VR128RegClass12.7k
);
805
13.5k
806
13.5k
    // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
807
13.5k
    // registers cannot be used even for integer operations.
808
13.5k
    addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? 
&X86::VR128XRegClass786
809
13.5k
                                                    : 
&X86::VR128RegClass12.7k
);
810
13.5k
    addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? 
&X86::VR128XRegClass786
811
13.5k
                                                    : 
&X86::VR128RegClass12.7k
);
812
13.5k
    addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? 
&X86::VR128XRegClass786
813
13.5k
                                                    : 
&X86::VR128RegClass12.7k
);
814
13.5k
    addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? 
&X86::VR128XRegClass786
815
13.5k
                                                    : 
&X86::VR128RegClass12.7k
);
816
13.5k
817
13.5k
    for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
818
81.2k
                     MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
819
81.2k
      setOperationAction(ISD::SDIV, VT, Custom);
820
81.2k
      setOperationAction(ISD::SREM, VT, Custom);
821
81.2k
      setOperationAction(ISD::UDIV, VT, Custom);
822
81.2k
      setOperationAction(ISD::UREM, VT, Custom);
823
81.2k
    }
824
13.5k
825
13.5k
    setOperationAction(ISD::MUL,                MVT::v2i8,  Custom);
826
13.5k
    setOperationAction(ISD::MUL,                MVT::v2i16, Custom);
827
13.5k
    setOperationAction(ISD::MUL,                MVT::v2i32, Custom);
828
13.5k
    setOperationAction(ISD::MUL,                MVT::v4i8,  Custom);
829
13.5k
    setOperationAction(ISD::MUL,                MVT::v4i16, Custom);
830
13.5k
    setOperationAction(ISD::MUL,                MVT::v8i8,  Custom);
831
13.5k
832
13.5k
    setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
833
13.5k
    setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
834
13.5k
    setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
835
13.5k
    setOperationAction(ISD::MULHU,              MVT::v4i32, Custom);
836
13.5k
    setOperationAction(ISD::MULHS,              MVT::v4i32, Custom);
837
13.5k
    setOperationAction(ISD::MULHU,              MVT::v16i8, Custom);
838
13.5k
    setOperationAction(ISD::MULHS,              MVT::v16i8, Custom);
839
13.5k
    setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
840
13.5k
    setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
841
13.5k
    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
842
13.5k
    setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
843
13.5k
    setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
844
13.5k
    setOperationAction(ISD::FCOPYSIGN,          MVT::v2f64, Custom);
845
13.5k
846
54.1k
    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
847
54.1k
      setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? 
Legal13.5k
:
Custom40.6k
);
848
54.1k
      setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? 
Legal13.5k
:
Custom40.6k
);
849
54.1k
      setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? 
Legal13.5k
:
Custom40.6k
);
850
54.1k
      setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? 
Legal13.5k
:
Custom40.6k
);
851
54.1k
    }
852
13.5k
853
13.5k
    setOperationAction(ISD::UADDSAT,            MVT::v16i8, Legal);
854
13.5k
    setOperationAction(ISD::SADDSAT,            MVT::v16i8, Legal);
855
13.5k
    setOperationAction(ISD::USUBSAT,            MVT::v16i8, Legal);
856
13.5k
    setOperationAction(ISD::SSUBSAT,            MVT::v16i8, Legal);
857
13.5k
    setOperationAction(ISD::UADDSAT,            MVT::v8i16, Legal);
858
13.5k
    setOperationAction(ISD::SADDSAT,            MVT::v8i16, Legal);
859
13.5k
    setOperationAction(ISD::USUBSAT,            MVT::v8i16, Legal);
860
13.5k
    setOperationAction(ISD::SSUBSAT,            MVT::v8i16, Legal);
861
13.5k
    setOperationAction(ISD::UADDSAT,            MVT::v4i32, Custom);
862
13.5k
    setOperationAction(ISD::USUBSAT,            MVT::v4i32, Custom);
863
13.5k
    setOperationAction(ISD::UADDSAT,            MVT::v2i64, Custom);
864
13.5k
    setOperationAction(ISD::USUBSAT,            MVT::v2i64, Custom);
865
13.5k
866
13.5k
    if (!ExperimentalVectorWideningLegalization) {
867
13.1k
      // Use widening instead of promotion.
868
13.1k
      for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
869
65.9k
                       MVT::v4i16, MVT::v2i16 }) {
870
65.9k
        setOperationAction(ISD::UADDSAT, VT, Custom);
871
65.9k
        setOperationAction(ISD::SADDSAT, VT, Custom);
872
65.9k
        setOperationAction(ISD::USUBSAT, VT, Custom);
873
65.9k
        setOperationAction(ISD::SSUBSAT, VT, Custom);
874
65.9k
      }
875
13.1k
    }
876
13.5k
877
13.5k
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
878
13.5k
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
879
13.5k
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
880
13.5k
881
13.5k
    // Provide custom widening for v2f32 setcc. This is really for VLX when
882
13.5k
    // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
883
13.5k
    // type legalization changing the result type to v4i1 during widening.
884
13.5k
    // It works fine for SSE2 and is probably faster so no need to qualify with
885
13.5k
    // VLX support.
886
13.5k
    setOperationAction(ISD::SETCC,               MVT::v2i32, Custom);
887
13.5k
888
54.1k
    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
889
54.1k
      setOperationAction(ISD::SETCC,              VT, Custom);
890
54.1k
      setOperationAction(ISD::CTPOP,              VT, Custom);
891
54.1k
      setOperationAction(ISD::ABS,                VT, Custom);
892
54.1k
893
54.1k
      // The condition codes aren't legal in SSE/AVX and under AVX512 we use
894
54.1k
      // setcc all the way to isel and prefer SETGT in some isel patterns.
895
54.1k
      setCondCodeAction(ISD::SETLT, VT, Custom);
896
54.1k
      setCondCodeAction(ISD::SETLE, VT, Custom);
897
54.1k
    }
898
13.5k
899
40.6k
    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
900
40.6k
      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
901
40.6k
      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
902
40.6k
      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
903
40.6k
      setOperationAction(ISD::VSELECT,            VT, Custom);
904
40.6k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
905
40.6k
    }
906
13.5k
907
13.5k
    // We support custom legalizing of sext and anyext loads for specific
908
13.5k
    // memory vector types which we can load as a scalar (or sequence of
909
13.5k
    // scalars) and extend in-register to a legal 128-bit vector type. For sext
910
13.5k
    // loads these must work with a single scalar load.
911
1.05M
    for (MVT VT : MVT::integer_vector_valuetypes()) {
912
1.05M
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
913
1.05M
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
914
1.05M
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
915
1.05M
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
916
1.05M
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
917
1.05M
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
918
1.05M
    }
919
13.5k
920
27.0k
    for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
921
27.0k
      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
922
27.0k
      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
923
27.0k
      setOperationAction(ISD::VSELECT,            VT, Custom);
924
27.0k
925
27.0k
      if (VT == MVT::v2i64 && 
!Subtarget.is64Bit()13.5k
)
926
1.47k
        continue;
927
25.6k
928
25.6k
      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
929
25.6k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
930
25.6k
    }
931
13.5k
932
13.5k
    // Custom lower v2i64 and v2f64 selects.
933
13.5k
    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
934
13.5k
    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
935
13.5k
    setOperationAction(ISD::SELECT,             MVT::v4i32, Custom);
936
13.5k
    setOperationAction(ISD::SELECT,             MVT::v8i16, Custom);
937
13.5k
    setOperationAction(ISD::SELECT,             MVT::v16i8, Custom);
938
13.5k
939
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
940
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
941
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i16, Custom);
942
13.5k
943
13.5k
    // Custom legalize these to avoid over promotion or custom promotion.
944
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i8,  Custom);
945
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i8,  Custom);
946
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i8,  Custom);
947
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i16, Custom);
948
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i16, Custom);
949
13.5k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i8,  Custom);
950
13.5k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i8,  Custom);
951
13.5k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i8,  Custom);
952
13.5k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i16, Custom);
953
13.5k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i16, Custom);
954
13.5k
955
13.5k
    // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
956
13.5k
    // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
957
13.5k
    // split again based on the input type, this will cause an AssertSExt i16 to
958
13.5k
    // be emitted instead of an AssertZExt. This will allow packssdw followed by
959
13.5k
    // packuswb to be used to truncate to v8i8. This is necessary since packusdw
960
13.5k
    // isn't available until sse4.1.
961
13.5k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
962
13.5k
963
13.5k
    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
964
13.5k
    setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
965
13.5k
966
13.5k
    setOperationAction(ISD::UINT_TO_FP,         MVT::v2i32, Custom);
967
13.5k
968
13.5k
    // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
969
13.5k
    setOperationAction(ISD::UINT_TO_FP,         MVT::v2f32, Custom);
970
13.5k
971
13.5k
    setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
972
13.5k
    setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
973
13.5k
974
13.5k
    // We want to legalize this to an f64 load rather than an i64 load on
975
13.5k
    // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
976
13.5k
    // store.
977
13.5k
    setOperationAction(ISD::LOAD,               MVT::v2i32, Custom);
978
13.5k
    setOperationAction(ISD::LOAD,               MVT::v4i16, Custom);
979
13.5k
    setOperationAction(ISD::LOAD,               MVT::v8i8,  Custom);
980
13.5k
    setOperationAction(ISD::STORE,              MVT::v2i32, Custom);
981
13.5k
    setOperationAction(ISD::STORE,              MVT::v4i16, Custom);
982
13.5k
    setOperationAction(ISD::STORE,              MVT::v8i8,  Custom);
983
13.5k
984
13.5k
    setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
985
13.5k
    setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
986
13.5k
    setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
987
13.5k
    if (!Subtarget.hasAVX512())
988
11.8k
      setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
989
13.5k
990
13.5k
    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
991
13.5k
    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
992
13.5k
    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
993
13.5k
994
13.5k
    if (ExperimentalVectorWideningLegalization) {
995
360
      setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
996
360
997
360
      setOperationAction(ISD::TRUNCATE,    MVT::v2i8,  Custom);
998
360
      setOperationAction(ISD::TRUNCATE,    MVT::v2i16, Custom);
999
360
      setOperationAction(ISD::TRUNCATE,    MVT::v2i32, Custom);
1000
360
      setOperationAction(ISD::TRUNCATE,    MVT::v4i8,  Custom);
1001
360
      setOperationAction(ISD::TRUNCATE,    MVT::v4i16, Custom);
1002
360
      setOperationAction(ISD::TRUNCATE,    MVT::v8i8,  Custom);
1003
13.1k
    } else {
1004
13.1k
      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1005
13.1k
    }
1006
13.5k
1007
13.5k
    // In the customized shift lowering, the legal v4i32/v2i64 cases
1008
13.5k
    // in AVX2 will be recognized.
1009
54.1k
    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1010
54.1k
      setOperationAction(ISD::SRL,              VT, Custom);
1011
54.1k
      setOperationAction(ISD::SHL,              VT, Custom);
1012
54.1k
      setOperationAction(ISD::SRA,              VT, Custom);
1013
54.1k
    }
1014
13.5k
1015
13.5k
    setOperationAction(ISD::ROTL,               MVT::v4i32, Custom);
1016
13.5k
    setOperationAction(ISD::ROTL,               MVT::v8i16, Custom);
1017
13.5k
1018
13.5k
    // With AVX512, expanding (and promoting the shifts) is better.
1019
13.5k
    if (!Subtarget.hasAVX512())
1020
11.8k
      setOperationAction(ISD::ROTL,             MVT::v16i8, Custom);
1021
13.5k
  }
1022
15.2k
1023
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasSSSE3()15.2k
) {
1024
7.63k
    setOperationAction(ISD::ABS,                MVT::v16i8, Legal);
1025
7.63k
    setOperationAction(ISD::ABS,                MVT::v8i16, Legal);
1026
7.63k
    setOperationAction(ISD::ABS,                MVT::v4i32, Legal);
1027
7.63k
    setOperationAction(ISD::BITREVERSE,         MVT::v16i8, Custom);
1028
7.63k
    setOperationAction(ISD::CTLZ,               MVT::v16i8, Custom);
1029
7.63k
    setOperationAction(ISD::CTLZ,               MVT::v8i16, Custom);
1030
7.63k
    setOperationAction(ISD::CTLZ,               MVT::v4i32, Custom);
1031
7.63k
    setOperationAction(ISD::CTLZ,               MVT::v2i64, Custom);
1032
7.63k
1033
7.63k
    // These might be better off as horizontal vector ops.
1034
7.63k
    setOperationAction(ISD::ADD,                MVT::i16, Custom);
1035
7.63k
    setOperationAction(ISD::ADD,                MVT::i32, Custom);
1036
7.63k
    setOperationAction(ISD::SUB,                MVT::i16, Custom);
1037
7.63k
    setOperationAction(ISD::SUB,                MVT::i32, Custom);
1038
7.63k
  }
1039
15.2k
1040
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasSSE41()15.2k
) {
1041
22.5k
    for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1042
22.5k
      setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
1043
22.5k
      setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
1044
22.5k
      setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal);
1045
22.5k
      setOperationAction(ISD::FRINT,            RoundedTy,  Legal);
1046
22.5k
      setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
1047
22.5k
    }
1048
5.62k
1049
5.62k
    setOperationAction(ISD::SMAX,               MVT::v16i8, Legal);
1050
5.62k
    setOperationAction(ISD::SMAX,               MVT::v4i32, Legal);
1051
5.62k
    setOperationAction(ISD::UMAX,               MVT::v8i16, Legal);
1052
5.62k
    setOperationAction(ISD::UMAX,               MVT::v4i32, Legal);
1053
5.62k
    setOperationAction(ISD::SMIN,               MVT::v16i8, Legal);
1054
5.62k
    setOperationAction(ISD::SMIN,               MVT::v4i32, Legal);
1055
5.62k
    setOperationAction(ISD::UMIN,               MVT::v8i16, Legal);
1056
5.62k
    setOperationAction(ISD::UMIN,               MVT::v4i32, Legal);
1057
5.62k
1058
5.62k
    // FIXME: Do we need to handle scalar-to-vector here?
1059
5.62k
    setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
1060
5.62k
1061
5.62k
    // We directly match byte blends in the backend as they match the VSELECT
1062
5.62k
    // condition form.
1063
5.62k
    setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
1064
5.62k
1065
5.62k
    // SSE41 brings specific instructions for doing vector sign extend even in
1066
5.62k
    // cases where we don't have SRA.
1067
16.8k
    for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1068
16.8k
      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1069
16.8k
      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1070
16.8k
    }
1071
5.62k
1072
5.62k
    if (!ExperimentalVectorWideningLegalization) {
1073
5.34k
      // Avoid narrow result types when widening. The legal types are listed
1074
5.34k
      // in the next loop.
1075
416k
      for (MVT VT : MVT::integer_vector_valuetypes()) {
1076
416k
        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
1077
416k
        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
1078
416k
        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
1079
416k
      }
1080
5.34k
    }
1081
5.62k
1082
5.62k
    // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1083
11.2k
    for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1084
11.2k
      setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8,  Legal);
1085
11.2k
      setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8,  Legal);
1086
11.2k
      if (!ExperimentalVectorWideningLegalization)
1087
10.6k
        setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8,  Legal);
1088
11.2k
      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8,  Legal);
1089
11.2k
      setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1090
11.2k
      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1091
11.2k
      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1092
11.2k
    }
1093
5.62k
1094
5.62k
    // i8 vectors are custom because the source register and source
1095
5.62k
    // source memory operand types are not the same width.
1096
5.62k
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
1097
5.62k
  }
1098
15.2k
1099
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasXOP()15.2k
) {
1100
177
    for (auto VT : { MVT::v16i8, MVT::v8i16,  MVT::v4i32, MVT::v2i64,
1101
177
                     MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1102
1.41k
      setOperationAction(ISD::ROTL, VT, Custom);
1103
177
1104
177
    // XOP can efficiently perform BITREVERSE with VPPERM.
1105
177
    for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1106
708
      setOperationAction(ISD::BITREVERSE, VT, Custom);
1107
177
1108
177
    for (auto VT : { MVT::v16i8, MVT::v8i16,  MVT::v4i32, MVT::v2i64,
1109
177
                     MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1110
1.41k
      setOperationAction(ISD::BITREVERSE, VT, Custom);
1111
177
  }
1112
15.2k
1113
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasAVX()15.2k
) {
1114
4.53k
    bool HasInt256 = Subtarget.hasInt256();
1115
4.53k
1116
4.53k
    addRegisterClass(MVT::v32i8,  Subtarget.hasVLX() ? 
&X86::VR256XRegClass786
1117
4.53k
                                                     : 
&X86::VR256RegClass3.74k
);
1118
4.53k
    addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? 
&X86::VR256XRegClass786
1119
4.53k
                                                     : 
&X86::VR256RegClass3.74k
);
1120
4.53k
    addRegisterClass(MVT::v8i32,  Subtarget.hasVLX() ? 
&X86::VR256XRegClass786
1121
4.53k
                                                     : 
&X86::VR256RegClass3.74k
);
1122
4.53k
    addRegisterClass(MVT::v8f32,  Subtarget.hasVLX() ? 
&X86::VR256XRegClass786
1123
4.53k
                                                     : 
&X86::VR256RegClass3.74k
);
1124
4.53k
    addRegisterClass(MVT::v4i64,  Subtarget.hasVLX() ? 
&X86::VR256XRegClass786
1125
4.53k
                                                     : 
&X86::VR256RegClass3.74k
);
1126
4.53k
    addRegisterClass(MVT::v4f64,  Subtarget.hasVLX() ? 
&X86::VR256XRegClass786
1127
4.53k
                                                     : 
&X86::VR256RegClass3.74k
);
1128
4.53k
1129
9.06k
    for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1130
9.06k
      setOperationAction(ISD::FFLOOR,     VT, Legal);
1131
9.06k
      setOperationAction(ISD::FCEIL,      VT, Legal);
1132
9.06k
      setOperationAction(ISD::FTRUNC,     VT, Legal);
1133
9.06k
      setOperationAction(ISD::FRINT,      VT, Legal);
1134
9.06k
      setOperationAction(ISD::FNEARBYINT, VT, Legal);
1135
9.06k
      setOperationAction(ISD::FNEG,       VT, Custom);
1136
9.06k
      setOperationAction(ISD::FABS,       VT, Custom);
1137
9.06k
      setOperationAction(ISD::FCOPYSIGN,  VT, Custom);
1138
9.06k
    }
1139
4.53k
1140
4.53k
    // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1141
4.53k
    // even though v8i16 is a legal type.
1142
4.53k
    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1143
4.53k
    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1144
4.53k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
1145
4.53k
1146
4.53k
    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
1147
4.53k
1148
4.53k
    if (!Subtarget.hasAVX512())
1149
2.86k
      setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1150
4.53k
1151
4.53k
    // In the customized shift lowering, the legal v8i32/v4i64 cases
1152
4.53k
    // in AVX2 will be recognized.
1153
18.1k
    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1154
18.1k
      setOperationAction(ISD::SRL, VT, Custom);
1155
18.1k
      setOperationAction(ISD::SHL, VT, Custom);
1156
18.1k
      setOperationAction(ISD::SRA, VT, Custom);
1157
18.1k
    }
1158
4.53k
1159
4.53k
    // These types need custom splitting if their input is a 128-bit vector.
1160
4.53k
    setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i64,  Custom);
1161
4.53k
    setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i32, Custom);
1162
4.53k
    setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i64,  Custom);
1163
4.53k
    setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i32, Custom);
1164
4.53k
1165
4.53k
    setOperationAction(ISD::ROTL,              MVT::v8i32,  Custom);
1166
4.53k
    setOperationAction(ISD::ROTL,              MVT::v16i16, Custom);
1167
4.53k
1168
4.53k
    // With BWI, expanding (and promoting the shifts) is the better.
1169
4.53k
    if (!Subtarget.hasBWI())
1170
3.74k
      setOperationAction(ISD::ROTL,            MVT::v32i8,  Custom);
1171
4.53k
1172
4.53k
    setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
1173
4.53k
    setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
1174
4.53k
    setOperationAction(ISD::SELECT,            MVT::v8i32, Custom);
1175
4.53k
    setOperationAction(ISD::SELECT,            MVT::v16i16, Custom);
1176
4.53k
    setOperationAction(ISD::SELECT,            MVT::v32i8, Custom);
1177
4.53k
    setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
1178
4.53k
1179
13.5k
    for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1180
13.5k
      setOperationAction(ISD::SIGN_EXTEND,     VT, Custom);
1181
13.5k
      setOperationAction(ISD::ZERO_EXTEND,     VT, Custom);
1182
13.5k
      setOperationAction(ISD::ANY_EXTEND,      VT, Custom);
1183
13.5k
    }
1184
4.53k
1185
4.53k
    setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
1186
4.53k
    setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
1187
4.53k
    setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
1188
4.53k
    setOperationAction(ISD::BITREVERSE,        MVT::v32i8, Custom);
1189
4.53k
1190
18.1k
    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1191
18.1k
      setOperationAction(ISD::SETCC,           VT, Custom);
1192
18.1k
      setOperationAction(ISD::CTPOP,           VT, Custom);
1193
18.1k
      setOperationAction(ISD::CTLZ,            VT, Custom);
1194
18.1k
1195
18.1k
      // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1196
18.1k
      // setcc all the way to isel and prefer SETGT in some isel patterns.
1197
18.1k
      setCondCodeAction(ISD::SETLT, VT, Custom);
1198
18.1k
      setCondCodeAction(ISD::SETLE, VT, Custom);
1199
18.1k
    }
1200
4.53k
1201
4.53k
    if (Subtarget.hasAnyFMA()) {
1202
3.02k
      for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1203
3.02k
                       MVT::v2f64, MVT::v4f64 })
1204
18.1k
        setOperationAction(ISD::FMA, VT, Legal);
1205
3.02k
    }
1206
4.53k
1207
18.1k
    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1208
18.1k
      setOperationAction(ISD::ADD, VT, HasInt256 ? 
Legal13.6k
:
Custom4.52k
);
1209
18.1k
      setOperationAction(ISD::SUB, VT, HasInt256 ? 
Legal13.6k
:
Custom4.52k
);
1210
18.1k
    }
1211
4.53k
1212
4.53k
    setOperationAction(ISD::MUL,       MVT::v4i64,  Custom);
1213
4.53k
    setOperationAction(ISD::MUL,       MVT::v8i32,  HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1214
4.53k
    setOperationAction(ISD::MUL,       MVT::v16i16, HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1215
4.53k
    setOperationAction(ISD::MUL,       MVT::v32i8,  Custom);
1216
4.53k
1217
4.53k
    setOperationAction(ISD::MULHU,     MVT::v8i32,  Custom);
1218
4.53k
    setOperationAction(ISD::MULHS,     MVT::v8i32,  Custom);
1219
4.53k
    setOperationAction(ISD::MULHU,     MVT::v16i16, HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1220
4.53k
    setOperationAction(ISD::MULHS,     MVT::v16i16, HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1221
4.53k
    setOperationAction(ISD::MULHU,     MVT::v32i8,  Custom);
1222
4.53k
    setOperationAction(ISD::MULHS,     MVT::v32i8,  Custom);
1223
4.53k
1224
4.53k
    setOperationAction(ISD::ABS,       MVT::v4i64,  Custom);
1225
4.53k
    setOperationAction(ISD::SMAX,      MVT::v4i64,  Custom);
1226
4.53k
    setOperationAction(ISD::UMAX,      MVT::v4i64,  Custom);
1227
4.53k
    setOperationAction(ISD::SMIN,      MVT::v4i64,  Custom);
1228
4.53k
    setOperationAction(ISD::UMIN,      MVT::v4i64,  Custom);
1229
4.53k
1230
4.53k
    setOperationAction(ISD::UADDSAT,   MVT::v32i8,  HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1231
4.53k
    setOperationAction(ISD::SADDSAT,   MVT::v32i8,  HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1232
4.53k
    setOperationAction(ISD::USUBSAT,   MVT::v32i8,  HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1233
4.53k
    setOperationAction(ISD::SSUBSAT,   MVT::v32i8,  HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1234
4.53k
    setOperationAction(ISD::UADDSAT,   MVT::v16i16, HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1235
4.53k
    setOperationAction(ISD::SADDSAT,   MVT::v16i16, HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1236
4.53k
    setOperationAction(ISD::USUBSAT,   MVT::v16i16, HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1237
4.53k
    setOperationAction(ISD::SSUBSAT,   MVT::v16i16, HasInt256 ? 
Legal3.40k
:
Custom1.13k
);
1238
4.53k
1239
13.5k
    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1240
13.5k
      setOperationAction(ISD::ABS,  VT, HasInt256 ? 
Legal10.2k
:
Custom3.39k
);
1241
13.5k
      setOperationAction(ISD::SMAX, VT, HasInt256 ? 
Legal10.2k
:
Custom3.39k
);
1242
13.5k
      setOperationAction(ISD::UMAX, VT, HasInt256 ? 
Legal10.2k
:
Custom3.39k
);
1243
13.5k
      setOperationAction(ISD::SMIN, VT, HasInt256 ? 
Legal10.2k
:
Custom3.39k
);
1244
13.5k
      setOperationAction(ISD::UMIN, VT, HasInt256 ? 
Legal10.2k
:
Custom3.39k
);
1245
13.5k
    }
1246
4.53k
1247
13.5k
    for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1248
13.5k
      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1249
13.5k
      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1250
13.5k
    }
1251
4.53k
1252
4.53k
    if (HasInt256) {
1253
3.40k
      // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1254
3.40k
      // when we have a 256bit-wide blend with immediate.
1255
3.40k
      setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1256
3.40k
1257
3.40k
      // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1258
6.80k
      for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1259
6.80k
        setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1260
6.80k
        setLoadExtAction(LoadExtOp, MVT::v8i32,  MVT::v8i8,  Legal);
1261
6.80k
        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i8,  Legal);
1262
6.80k
        setLoadExtAction(LoadExtOp, MVT::v8i32,  MVT::v8i16, Legal);
1263
6.80k
        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i16, Legal);
1264
6.80k
        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i32, Legal);
1265
6.80k
      }
1266
3.40k
    }
1267
4.53k
1268
4.53k
    for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1269
36.2k
                     MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1270
36.2k
      setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? 
Legal6.28k
:
Custom29.9k
);
1271
36.2k
      setOperationAction(ISD::MSTORE, VT, Legal);
1272
36.2k
    }
1273
4.53k
1274
4.53k
    // Extract subvector is special because the value type
1275
4.53k
    // (result) is 128-bit but the source is 256-bit wide.
1276
4.53k
    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1277
27.1k
                     MVT::v4f32, MVT::v2f64 }) {
1278
27.1k
      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1279
27.1k
    }
1280
4.53k
1281
4.53k
    // Custom lower several nodes for 256-bit types.
1282
4.53k
    for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1283
27.1k
                    MVT::v8f32, MVT::v4f64 }) {
1284
27.1k
      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
1285
27.1k
      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
1286
27.1k
      setOperationAction(ISD::VSELECT,            VT, Custom);
1287
27.1k
      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
1288
27.1k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1289
27.1k
      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
1290
27.1k
      setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);
1291
27.1k
      setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
1292
27.1k
      setOperationAction(ISD::STORE,              VT, Custom);
1293
27.1k
    }
1294
4.53k
1295
4.53k
    if (HasInt256)
1296
3.40k
      setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
1297
4.53k
1298
4.53k
    if (HasInt256) {
1299
3.40k
      // Custom legalize 2x32 to get a little better code.
1300
3.40k
      setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1301
3.40k
      setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1302
3.40k
1303
3.40k
      for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1304
3.40k
                       MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1305
27.2k
        setOperationAction(ISD::MGATHER,  VT, Custom);
1306
3.40k
    }
1307
4.53k
  }
1308
15.2k
1309
15.2k
  // This block controls legalization of the mask vector sizes that are
1310
15.2k
  // available with AVX512. 512-bit vectors are in a separate block controlled
1311
15.2k
  // by useAVX512Regs.
1312
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasAVX512()15.2k
) {
1313
1.66k
    addRegisterClass(MVT::v1i1,   &X86::VK1RegClass);
1314
1.66k
    addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
1315
1.66k
    addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
1316
1.66k
    addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
1317
1.66k
    addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
1318
1.66k
1319
1.66k
    setOperationAction(ISD::SELECT,             MVT::v1i1, Custom);
1320
1.66k
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1321
1.66k
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v1i1, Custom);
1322
1.66k
1323
1.66k
    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1,  MVT::v8i32);
1324
1.66k
    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1,  MVT::v8i32);
1325
1.66k
    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1,  MVT::v4i32);
1326
1.66k
    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1,  MVT::v4i32);
1327
1.66k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i1,  Custom);
1328
1.66k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i1,  Custom);
1329
1.66k
1330
1.66k
    // There is no byte sized k-register load or store without AVX512DQ.
1331
1.66k
    if (!Subtarget.hasDQI()) {
1332
1.21k
      setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1333
1.21k
      setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1334
1.21k
      setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1335
1.21k
      setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1336
1.21k
1337
1.21k
      setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1338
1.21k
      setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1339
1.21k
      setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1340
1.21k
      setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1341
1.21k
    }
1342
1.66k
1343
1.66k
    // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1344
6.67k
    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1345
6.67k
      setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1346
6.67k
      setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1347
6.67k
      setOperationAction(ISD::ANY_EXTEND,  VT, Custom);
1348
6.67k
    }
1349
1.66k
1350
6.67k
    for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1351
6.67k
      setOperationAction(ISD::ADD,              VT, Custom);
1352
6.67k
      setOperationAction(ISD::SUB,              VT, Custom);
1353
6.67k
      setOperationAction(ISD::MUL,              VT, Custom);
1354
6.67k
      setOperationAction(ISD::SETCC,            VT, Custom);
1355
6.67k
      setOperationAction(ISD::SELECT,           VT, Custom);
1356
6.67k
      setOperationAction(ISD::TRUNCATE,         VT, Custom);
1357
6.67k
      setOperationAction(ISD::UADDSAT,          VT, Custom);
1358
6.67k
      setOperationAction(ISD::SADDSAT,          VT, Custom);
1359
6.67k
      setOperationAction(ISD::USUBSAT,          VT, Custom);
1360
6.67k
      setOperationAction(ISD::SSUBSAT,          VT, Custom);
1361
6.67k
1362
6.67k
      setOperationAction(ISD::BUILD_VECTOR,     VT, Custom);
1363
6.67k
      setOperationAction(ISD::CONCAT_VECTORS,   VT, Custom);
1364
6.67k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1365
6.67k
      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1366
6.67k
      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1367
6.67k
      setOperationAction(ISD::VECTOR_SHUFFLE,   VT,  Custom);
1368
6.67k
      setOperationAction(ISD::VSELECT,          VT,  Expand);
1369
6.67k
    }
1370
1.66k
1371
1.66k
    for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1372
6.67k
      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1373
1.66k
  }
1374
15.2k
1375
15.2k
  // This block controls legalization for 512-bit operations with 32/64 bit
1376
15.2k
  // elements. 512-bits can be disabled based on prefer-vector-width and
1377
15.2k
  // required-vector-width function attributes.
1378
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.useAVX512Regs()15.2k
) {
1379
1.66k
    addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1380
1.66k
    addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1381
1.66k
    addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
1382
1.66k
    addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
1383
1.66k
1384
3.32k
    for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1385
3.32k
      setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8,  Legal);
1386
3.32k
      setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1387
3.32k
      setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i8,   Legal);
1388
3.32k
      setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i16,  Legal);
1389
3.32k
      setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i32,  Legal);
1390
3.32k
    }
1391
1.66k
1392
3.32k
    for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1393
3.32k
      setOperationAction(ISD::FNEG,  VT, Custom);
1394
3.32k
      setOperationAction(ISD::FABS,  VT, Custom);
1395
3.32k
      setOperationAction(ISD::FMA,   VT, Legal);
1396
3.32k
      setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1397
3.32k
    }
1398
1.66k
1399
1.66k
    setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
1400
1.66k
    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1401
1.66k
    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1402
1.66k
    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1403
1.66k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
1404
1.66k
    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1405
1.66k
    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1406
1.66k
    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1407
1.66k
    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
1408
1.66k
    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
1409
1.66k
1410
1.66k
    setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
1411
1.66k
    setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
1412
1.66k
    setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
1413
1.66k
    setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
1414
1.66k
    setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
1415
1.66k
1416
1.66k
    // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1417
1.66k
    // to 512-bit rather than use the AVX2 instructions so that we can use
1418
1.66k
    // k-masks.
1419
1.66k
    if (!Subtarget.hasVLX()) {
1420
883
      for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1421
7.06k
           MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1422
7.06k
        setOperationAction(ISD::MLOAD,  VT, Custom);
1423
7.06k
        setOperationAction(ISD::MSTORE, VT, Custom);
1424
7.06k
      }
1425
883
    }
1426
1.66k
1427
1.66k
    setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
1428
1.66k
    setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
1429
1.66k
    setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
1430
1.66k
    setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
1431
1.66k
    setOperationAction(ISD::ANY_EXTEND,         MVT::v16i32, Custom);
1432
1.66k
    setOperationAction(ISD::ANY_EXTEND,         MVT::v8i64, Custom);
1433
1.66k
    setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
1434
1.66k
    setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
1435
1.66k
1436
1.66k
    if (ExperimentalVectorWideningLegalization) {
1437
132
      // Need to custom widen this if we don't have AVX512BW.
1438
132
      setOperationAction(ISD::ANY_EXTEND,         MVT::v8i8, Custom);
1439
132
      setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i8, Custom);
1440
132
      setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i8, Custom);
1441
132
    }
1442
1.66k
1443
3.32k
    for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1444
3.32k
      setOperationAction(ISD::FFLOOR,           VT, Legal);
1445
3.32k
      setOperationAction(ISD::FCEIL,            VT, Legal);
1446
3.32k
      setOperationAction(ISD::FTRUNC,           VT, Legal);
1447
3.32k
      setOperationAction(ISD::FRINT,            VT, Legal);
1448
3.32k
      setOperationAction(ISD::FNEARBYINT,       VT, Legal);
1449
3.32k
1450
3.32k
      setOperationAction(ISD::SELECT,           VT, Custom);
1451
3.32k
    }
1452
1.66k
1453
1.66k
    // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1454
4.98k
    for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1455
4.98k
      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1456
4.98k
      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1457
4.98k
    }
1458
1.66k
1459
1.66k
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
1460
1.66k
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
1461
1.66k
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
1462
1.66k
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
1463
1.66k
1464
1.66k
    setOperationAction(ISD::MUL,                MVT::v8i64, Custom);
1465
1.66k
    setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
1466
1.66k
1467
1.66k
    setOperationAction(ISD::MULHU,              MVT::v16i32,  Custom);
1468
1.66k
    setOperationAction(ISD::MULHS,              MVT::v16i32,  Custom);
1469
1.66k
1470
3.32k
    for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1471
3.32k
      setOperationAction(ISD::SMAX,             VT, Legal);
1472
3.32k
      setOperationAction(ISD::UMAX,             VT, Legal);
1473
3.32k
      setOperationAction(ISD::SMIN,             VT, Legal);
1474
3.32k
      setOperationAction(ISD::UMIN,             VT, Legal);
1475
3.32k
      setOperationAction(ISD::ABS,              VT, Legal);
1476
3.32k
      setOperationAction(ISD::SRL,              VT, Custom);
1477
3.32k
      setOperationAction(ISD::SHL,              VT, Custom);
1478
3.32k
      setOperationAction(ISD::SRA,              VT, Custom);
1479
3.32k
      setOperationAction(ISD::CTPOP,            VT, Custom);
1480
3.32k
      setOperationAction(ISD::ROTL,             VT, Custom);
1481
3.32k
      setOperationAction(ISD::ROTR,             VT, Custom);
1482
3.32k
      setOperationAction(ISD::SETCC,            VT, Custom);
1483
3.32k
      setOperationAction(ISD::SELECT,           VT, Custom);
1484
3.32k
1485
3.32k
      // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1486
3.32k
      // setcc all the way to isel and prefer SETGT in some isel patterns.
1487
3.32k
      setCondCodeAction(ISD::SETLT, VT, Custom);
1488
3.32k
      setCondCodeAction(ISD::SETLE, VT, Custom);
1489
3.32k
    }
1490
1.66k
1491
1.66k
    if (Subtarget.hasDQI()) {
1492
450
      setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1493
450
      setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1494
450
      setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1495
450
      setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1496
450
1497
450
      setOperationAction(ISD::MUL,        MVT::v8i64, Legal);
1498
450
    }
1499
1.66k
1500
1.66k
    if (Subtarget.hasCDI()) {
1501
294
      // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1502
588
      for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1503
588
        setOperationAction(ISD::CTLZ,            VT, Legal);
1504
588
      }
1505
294
    } // Subtarget.hasCDI()
1506
1.66k
1507
1.66k
    if (Subtarget.hasVPOPCNTDQ()) {
1508
24
      for (auto VT : { MVT::v16i32, MVT::v8i64 })
1509
48
        setOperationAction(ISD::CTPOP, VT, Legal);
1510
24
    }
1511
1.66k
1512
1.66k
    // Extract subvector is special because the value type
1513
1.66k
    // (result) is 256-bit but the source is 512-bit wide.
1514
1.66k
    // 128-bit was made Legal under AVX1.
1515
1.66k
    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1516
1.66k
                     MVT::v8f32, MVT::v4f64 })
1517
9.97k
      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1518
1.66k
1519
6.65k
    for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1520
6.65k
      setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
1521
6.65k
      setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
1522
6.65k
      setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
1523
6.65k
      setOperationAction(ISD::VSELECT,             VT, Custom);
1524
6.65k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
1525
6.65k
      setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
1526
6.65k
      setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Legal);
1527
6.65k
      setOperationAction(ISD::MLOAD,               VT, Legal);
1528
6.65k
      setOperationAction(ISD::MSTORE,              VT, Legal);
1529
6.65k
      setOperationAction(ISD::MGATHER,             VT, Custom);
1530
6.65k
      setOperationAction(ISD::MSCATTER,            VT, Custom);
1531
6.65k
    }
1532
1.66k
    // Need to custom split v32i16/v64i8 bitcasts.
1533
1.66k
    if (!Subtarget.hasBWI()) {
1534
880
      setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1535
880
      setOperationAction(ISD::BITCAST, MVT::v64i8,  Custom);
1536
880
    }
1537
1.66k
1538
1.66k
    if (Subtarget.hasVBMI2()) {
1539
56
      for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1540
56
        setOperationAction(ISD::FSHL, VT, Custom);
1541
56
        setOperationAction(ISD::FSHR, VT, Custom);
1542
56
      }
1543
28
    }
1544
1.66k
  }// has  AVX-512
1545
15.2k
1546
15.2k
  // This block controls legalization for operations that don't have
1547
15.2k
  // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1548
15.2k
  // narrower widths.
1549
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasAVX512()15.2k
) {
1550
1.66k
    // These operations are handled on non-VLX by artificially widening in
1551
1.66k
    // isel patterns.
1552
1.66k
    // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1553
1.66k
1554
1.66k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
1555
1.66k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
1556
1.66k
    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i32, Custom);
1557
1.66k
    setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
1558
1.66k
    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
1559
1.66k
1560
3.33k
    for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1561
3.33k
      setOperationAction(ISD::SMAX, VT, Legal);
1562
3.33k
      setOperationAction(ISD::UMAX, VT, Legal);
1563
3.33k
      setOperationAction(ISD::SMIN, VT, Legal);
1564
3.33k
      setOperationAction(ISD::UMIN, VT, Legal);
1565
3.33k
      setOperationAction(ISD::ABS,  VT, Legal);
1566
3.33k
    }
1567
1.66k
1568
6.67k
    for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1569
6.67k
      setOperationAction(ISD::ROTL,     VT, Custom);
1570
6.67k
      setOperationAction(ISD::ROTR,     VT, Custom);
1571
6.67k
    }
1572
1.66k
1573
1.66k
    // Custom legalize 2x32 to get a little better code.
1574
1.66k
    setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1575
1.66k
    setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1576
1.66k
1577
1.66k
    for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1578
1.66k
                     MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1579
13.3k
      setOperationAction(ISD::MSCATTER, VT, Custom);
1580
1.66k
1581
1.66k
    if (Subtarget.hasDQI()) {
1582
908
      for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1583
908
        setOperationAction(ISD::SINT_TO_FP,     VT, Legal);
1584
908
        setOperationAction(ISD::UINT_TO_FP,     VT, Legal);
1585
908
        setOperationAction(ISD::FP_TO_SINT,     VT, Legal);
1586
908
        setOperationAction(ISD::FP_TO_UINT,     VT, Legal);
1587
908
1588
908
        setOperationAction(ISD::MUL,            VT, Legal);
1589
908
      }
1590
454
    }
1591
1.66k
1592
1.66k
    if (Subtarget.hasCDI()) {
1593
1.18k
      for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1594
1.18k
        setOperationAction(ISD::CTLZ,            VT, Legal);
1595
1.18k
      }
1596
296
    } // Subtarget.hasCDI()
1597
1.66k
1598
1.66k
    if (Subtarget.hasVPOPCNTDQ()) {
1599
24
      for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1600
96
        setOperationAction(ISD::CTPOP, VT, Legal);
1601
24
    }
1602
1.66k
  }
1603
15.2k
1604
15.2k
  // This block control legalization of v32i1/v64i1 which are available with
1605
15.2k
  // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1606
15.2k
  // useBWIRegs.
1607
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasBWI()15.2k
) {
1608
787
    addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
1609
787
    addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
1610
787
1611
1.57k
    for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1612
1.57k
      setOperationAction(ISD::ADD,                VT, Custom);
1613
1.57k
      setOperationAction(ISD::SUB,                VT, Custom);
1614
1.57k
      setOperationAction(ISD::MUL,                VT, Custom);
1615
1.57k
      setOperationAction(ISD::VSELECT,            VT, Expand);
1616
1.57k
      setOperationAction(ISD::UADDSAT,            VT, Custom);
1617
1.57k
      setOperationAction(ISD::SADDSAT,            VT, Custom);
1618
1.57k
      setOperationAction(ISD::USUBSAT,            VT, Custom);
1619
1.57k
      setOperationAction(ISD::SSUBSAT,            VT, Custom);
1620
1.57k
1621
1.57k
      setOperationAction(ISD::TRUNCATE,           VT, Custom);
1622
1.57k
      setOperationAction(ISD::SETCC,              VT, Custom);
1623
1.57k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1624
1.57k
      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
1625
1.57k
      setOperationAction(ISD::SELECT,             VT, Custom);
1626
1.57k
      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
1627
1.57k
      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
1628
1.57k
    }
1629
787
1630
787
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
1631
787
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
1632
787
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
1633
787
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
1634
787
    for (auto VT : { MVT::v16i1, MVT::v32i1 })
1635
1.57k
      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1636
787
1637
787
    // Extends from v32i1 masks to 256-bit vectors.
1638
787
    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
1639
787
    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
1640
787
    setOperationAction(ISD::ANY_EXTEND,         MVT::v32i8, Custom);
1641
787
  }
1642
15.2k
1643
15.2k
  // This block controls legalization for v32i16 and v64i8. 512-bits can be
1644
15.2k
  // disabled based on prefer-vector-width and required-vector-width function
1645
15.2k
  // attributes.
1646
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.useBWIRegs()15.2k
) {
1647
783
    addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1648
783
    addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
1649
783
1650
783
    // Extends from v64i1 masks to 512-bit vectors.
1651
783
    setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
1652
783
    setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
1653
783
    setOperationAction(ISD::ANY_EXTEND,         MVT::v64i8, Custom);
1654
783
1655
783
    setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
1656
783
    setOperationAction(ISD::MUL,                MVT::v64i8, Custom);
1657
783
    setOperationAction(ISD::MULHS,              MVT::v32i16, Legal);
1658
783
    setOperationAction(ISD::MULHU,              MVT::v32i16, Legal);
1659
783
    setOperationAction(ISD::MULHS,              MVT::v64i8, Custom);
1660
783
    setOperationAction(ISD::MULHU,              MVT::v64i8, Custom);
1661
783
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i16, Custom);
1662
783
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i8, Custom);
1663
783
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i16, Legal);
1664
783
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i8, Legal);
1665
783
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1666
783
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1667
783
    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i16, Custom);
1668
783
    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v64i8, Custom);
1669
783
    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
1670
783
    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
1671
783
    setOperationAction(ISD::ANY_EXTEND,         MVT::v32i16, Custom);
1672
783
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i16, Custom);
1673
783
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i8, Custom);
1674
783
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i16, Custom);
1675
783
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i8, Custom);
1676
783
    setOperationAction(ISD::TRUNCATE,           MVT::v32i8, Custom);
1677
783
    setOperationAction(ISD::BITREVERSE,         MVT::v64i8, Custom);
1678
783
1679
783
    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1680
783
    setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1681
783
1682
783
    setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
1683
783
1684
1.56k
    for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1685
1.56k
      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1686
1.56k
      setOperationAction(ISD::VSELECT,      VT, Custom);
1687
1.56k
      setOperationAction(ISD::ABS,          VT, Legal);
1688
1.56k
      setOperationAction(ISD::SRL,          VT, Custom);
1689
1.56k
      setOperationAction(ISD::SHL,          VT, Custom);
1690
1.56k
      setOperationAction(ISD::SRA,          VT, Custom);
1691
1.56k
      setOperationAction(ISD::MLOAD,        VT, Legal);
1692
1.56k
      setOperationAction(ISD::MSTORE,       VT, Legal);
1693
1.56k
      setOperationAction(ISD::CTPOP,        VT, Custom);
1694
1.56k
      setOperationAction(ISD::CTLZ,         VT, Custom);
1695
1.56k
      setOperationAction(ISD::SMAX,         VT, Legal);
1696
1.56k
      setOperationAction(ISD::UMAX,         VT, Legal);
1697
1.56k
      setOperationAction(ISD::SMIN,         VT, Legal);
1698
1.56k
      setOperationAction(ISD::UMIN,         VT, Legal);
1699
1.56k
      setOperationAction(ISD::SETCC,        VT, Custom);
1700
1.56k
      setOperationAction(ISD::UADDSAT,      VT, Legal);
1701
1.56k
      setOperationAction(ISD::SADDSAT,      VT, Legal);
1702
1.56k
      setOperationAction(ISD::USUBSAT,      VT, Legal);
1703
1.56k
      setOperationAction(ISD::SSUBSAT,      VT, Legal);
1704
1.56k
      setOperationAction(ISD::SELECT,       VT, Custom);
1705
1.56k
1706
1.56k
      // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1707
1.56k
      // setcc all the way to isel and prefer SETGT in some isel patterns.
1708
1.56k
      setCondCodeAction(ISD::SETLT, VT, Custom);
1709
1.56k
      setCondCodeAction(ISD::SETLE, VT, Custom);
1710
1.56k
    }
1711
783
1712
1.56k
    for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1713
1.56k
      setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1714
1.56k
    }
1715
783
1716
783
    if (Subtarget.hasBITALG()) {
1717
16
      for (auto VT : { MVT::v64i8, MVT::v32i16 })
1718
32
        setOperationAction(ISD::CTPOP, VT, Legal);
1719
16
    }
1720
783
1721
783
    if (Subtarget.hasVBMI2()) {
1722
28
      setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1723
28
      setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1724
28
    }
1725
783
  }
1726
15.2k
1727
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasBWI()15.2k
) {
1728
3.14k
    for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1729
3.14k
      setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? 
Legal1.97k
:
Custom1.17k
);
1730
3.14k
      setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? 
Legal1.97k
:
Custom1.17k
);
1731
3.14k
    }
1732
787
1733
787
    // These operations are handled on non-VLX by artificially widening in
1734
787
    // isel patterns.
1735
787
    // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1736
787
1737
787
    if (Subtarget.hasBITALG()) {
1738
16
      for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1739
64
        setOperationAction(ISD::CTPOP, VT, Legal);
1740
16
    }
1741
787
  }
1742
15.2k
1743
15.2k
  if (!Subtarget.useSoftFloat() && 
Subtarget.hasVLX()15.2k
) {
1744
786
    setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
1745
786
    setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1746
786
    setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1747
786
    setTruncStoreAction(MVT::v8i32, MVT::v8i8,  Legal);
1748
786
    setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1749
786
1750
786
    setTruncStoreAction(MVT::v2i64, MVT::v2i8,  Legal);
1751
786
    setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1752
786
    setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1753
786
    setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
1754
786
    setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1755
786
1756
786
    if (Subtarget.hasDQI()) {
1757
350
      // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1758
350
      // v2f32 UINT_TO_FP is already custom under SSE2.
1759
350
      setOperationAction(ISD::SINT_TO_FP,    MVT::v2f32, Custom);
1760
350
      assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
1761
350
             "Unexpected operation action!");
1762
350
      // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1763
350
      setOperationAction(ISD::FP_TO_SINT,    MVT::v2f32, Custom);
1764
350
      setOperationAction(ISD::FP_TO_UINT,    MVT::v2f32, Custom);
1765
350
    }
1766
786
1767
786
    if (Subtarget.hasBWI()) {
1768
493
      setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
1769
493
      setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
1770
493
    }
1771
786
1772
786
    if (Subtarget.hasVBMI2()) {
1773
16
      // TODO: Make these legal even without VLX?
1774
16
      for (auto VT : { MVT::v8i16,  MVT::v4i32, MVT::v2i64,
1775
96
                       MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1776
96
        setOperationAction(ISD::FSHL, VT, Custom);
1777
96
        setOperationAction(ISD::FSHR, VT, Custom);
1778
96
      }
1779
16
    }
1780
786
  }
1781
15.2k
1782
15.2k
  // We want to custom lower some of our intrinsics.
1783
15.2k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1784
15.2k
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1785
15.2k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1786
15.2k
  if (!Subtarget.is64Bit()) {
1787
3.16k
    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1788
3.16k
  }
1789
15.2k
1790
15.2k
  // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1791
15.2k
  // handle type legalization for these operations here.
1792
15.2k
  //
1793
15.2k
  // FIXME: We really should do custom legalization for addition and
1794
15.2k
  // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
1795
15.2k
  // than generic legalization for 64-bit multiplication-with-overflow, though.
1796
61.0k
  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1797
61.0k
    if (VT == MVT::i64 && 
!Subtarget.is64Bit()15.2k
)
1798
3.16k
      continue;
1799
57.9k
    // Add/Sub/Mul with overflow operations are custom lowered.
1800
57.9k
    setOperationAction(ISD::SADDO, VT, Custom);
1801
57.9k
    setOperationAction(ISD::UADDO, VT, Custom);
1802
57.9k
    setOperationAction(ISD::SSUBO, VT, Custom);
1803
57.9k
    setOperationAction(ISD::USUBO, VT, Custom);
1804
57.9k
    setOperationAction(ISD::SMULO, VT, Custom);
1805
57.9k
    setOperationAction(ISD::UMULO, VT, Custom);
1806
57.9k
1807
57.9k
    // Support carry in as value rather than glue.
1808
57.9k
    setOperationAction(ISD::ADDCARRY, VT, Custom);
1809
57.9k
    setOperationAction(ISD::SUBCARRY, VT, Custom);
1810
57.9k
    setOperationAction(ISD::SETCCCARRY, VT, Custom);
1811
57.9k
  }
1812
15.2k
1813
15.2k
  if (!Subtarget.is64Bit()) {
1814
3.16k
    // These libcalls are not available in 32-bit.
1815
3.16k
    setLibcallName(RTLIB::SHL_I128, nullptr);
1816
3.16k
    setLibcallName(RTLIB::SRL_I128, nullptr);
1817
3.16k
    setLibcallName(RTLIB::SRA_I128, nullptr);
1818
3.16k
    setLibcallName(RTLIB::MUL_I128, nullptr);
1819
3.16k
  }
1820
15.2k
1821
15.2k
  // Combine sin / cos into _sincos_stret if it is available.
1822
15.2k
  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1823
15.2k
      
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr2.79k
) {
1824
2.79k
    setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1825
2.79k
    setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1826
2.79k
  }
1827
15.2k
1828
15.2k
  if (Subtarget.isTargetWin64()) {
1829
452
    setOperationAction(ISD::SDIV, MVT::i128, Custom);
1830
452
    setOperationAction(ISD::UDIV, MVT::i128, Custom);
1831
452
    setOperationAction(ISD::SREM, MVT::i128, Custom);
1832
452
    setOperationAction(ISD::UREM, MVT::i128, Custom);
1833
452
    setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1834
452
    setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1835
452
  }
1836
15.2k
1837
15.2k
  // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1838
15.2k
  // is. We should promote the value to 64-bits to solve this.
1839
15.2k
  // This is what the CRT headers do - `fmodf` is an inline header
1840
15.2k
  // function casting to f64 and calling `fmod`.
1841
15.2k
  if (Subtarget.is32Bit() &&
1842
15.2k
      
(3.16k
Subtarget.isTargetWindowsMSVC()3.16k
||
Subtarget.isTargetWindowsItanium()2.88k
))
1843
281
    for (ISD::NodeType Op :
1844
281
         {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1845
281
          ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1846
2.52k
      if (isOperationExpand(Op, MVT::f32))
1847
2.48k
        setOperationAction(Op, MVT::f32, Promote);
1848
15.2k
1849
15.2k
  // We have target-specific dag combine patterns for the following nodes:
1850
15.2k
  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1851
15.2k
  setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1852
15.2k
  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1853
15.2k
  setTargetDAGCombine(ISD::CONCAT_VECTORS);
1854
15.2k
  setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1855
15.2k
  setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1856
15.2k
  setTargetDAGCombine(ISD::BITCAST);
1857
15.2k
  setTargetDAGCombine(ISD::VSELECT);
1858
15.2k
  setTargetDAGCombine(ISD::SELECT);
1859
15.2k
  setTargetDAGCombine(ISD::SHL);
1860
15.2k
  setTargetDAGCombine(ISD::SRA);
1861
15.2k
  setTargetDAGCombine(ISD::SRL);
1862
15.2k
  setTargetDAGCombine(ISD::OR);
1863
15.2k
  setTargetDAGCombine(ISD::AND);
1864
15.2k
  setTargetDAGCombine(ISD::ADD);
1865
15.2k
  setTargetDAGCombine(ISD::FADD);
1866
15.2k
  setTargetDAGCombine(ISD::FSUB);
1867
15.2k
  setTargetDAGCombine(ISD::FNEG);
1868
15.2k
  setTargetDAGCombine(ISD::FMA);
1869
15.2k
  setTargetDAGCombine(ISD::FMINNUM);
1870
15.2k
  setTargetDAGCombine(ISD::FMAXNUM);
1871
15.2k
  setTargetDAGCombine(ISD::SUB);
1872
15.2k
  setTargetDAGCombine(ISD::LOAD);
1873
15.2k
  setTargetDAGCombine(ISD::MLOAD);
1874
15.2k
  setTargetDAGCombine(ISD::STORE);
1875
15.2k
  setTargetDAGCombine(ISD::MSTORE);
1876
15.2k
  setTargetDAGCombine(ISD::TRUNCATE);
1877
15.2k
  setTargetDAGCombine(ISD::ZERO_EXTEND);
1878
15.2k
  setTargetDAGCombine(ISD::ANY_EXTEND);
1879
15.2k
  setTargetDAGCombine(ISD::SIGN_EXTEND);
1880
15.2k
  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1881
15.2k
  setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
1882
15.2k
  setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1883
15.2k
  setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1884
15.2k
  setTargetDAGCombine(ISD::SINT_TO_FP);
1885
15.2k
  setTargetDAGCombine(ISD::UINT_TO_FP);
1886
15.2k
  setTargetDAGCombine(ISD::SETCC);
1887
15.2k
  setTargetDAGCombine(ISD::MUL);
1888
15.2k
  setTargetDAGCombine(ISD::XOR);
1889
15.2k
  setTargetDAGCombine(ISD::MSCATTER);
1890
15.2k
  setTargetDAGCombine(ISD::MGATHER);
1891
15.2k
1892
15.2k
  computeRegisterProperties(Subtarget.getRegisterInfo());
1893
15.2k
1894
15.2k
  MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1895
15.2k
  MaxStoresPerMemsetOptSize = 8;
1896
15.2k
  MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1897
15.2k
  MaxStoresPerMemcpyOptSize = 4;
1898
15.2k
  MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1899
15.2k
  MaxStoresPerMemmoveOptSize = 4;
1900
15.2k
1901
15.2k
  // TODO: These control memcmp expansion in CGP and could be raised higher, but
1902
15.2k
  // that needs to benchmarked and balanced with the potential use of vector
1903
15.2k
  // load/store types (PR33329, PR33914).
1904
15.2k
  MaxLoadsPerMemcmp = 2;
1905
15.2k
  MaxLoadsPerMemcmpOptSize = 2;
1906
15.2k
1907
15.2k
  // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1908
15.2k
  setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1909
15.2k
1910
15.2k
  // An out-of-order CPU can speculatively execute past a predictable branch,
1911
15.2k
  // but a conditional move could be stalled by an expensive earlier operation.
1912
15.2k
  PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1913
15.2k
  EnableExtLdPromotion = true;
1914
15.2k
  setPrefFunctionAlignment(4); // 2^4 bytes.
1915
15.2k
1916
15.2k
  verifyIntrinsicTables();
1917
15.2k
}
1918
1919
// This has so far only been implemented for 64-bit MachO.
1920
1.29k
bool X86TargetLowering::useLoadStackGuardNode() const {
1921
1.29k
  return Subtarget.isTargetMachO() && 
Subtarget.is64Bit()548
;
1922
1.29k
}
1923
1924
1.41k
bool X86TargetLowering::useStackGuardXorFP() const {
1925
1.41k
  // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1926
1.41k
  return Subtarget.getTargetTriple().isOSMSVCRT();
1927
1.41k
}
1928
1929
SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1930
282
                                               const SDLoc &DL) const {
1931
282
  EVT PtrTy = getPointerTy(DAG.getDataLayout());
1932
282
  unsigned XorOp = Subtarget.is64Bit() ? 
X86::XOR64_FP140
:
X86::XOR32_FP142
;
1933
282
  MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1934
282
  return SDValue(Node, 0);
1935
282
}
1936
1937
TargetLoweringBase::LegalizeTypeAction
1938
1.56M
X86TargetLowering::getPreferredVectorAction(MVT VT) const {
1939
1.56M
  if (VT == MVT::v32i1 && 
Subtarget.hasAVX512()14.4k
&&
!Subtarget.hasBWI()882
)
1940
882
    return TypeSplitVector;
1941
1.56M
1942
1.56M
  if (ExperimentalVectorWideningLegalization &&
1943
1.56M
      
VT.getVectorNumElements() != 135.3k
&&
1944
1.56M
      
VT.getVectorElementType() != MVT::i129.9k
)
1945
25.6k
    return TypeWidenVector;
1946
1.54M
1947
1.54M
  return TargetLoweringBase::getPreferredVectorAction(VT);
1948
1.54M
}
1949
1950
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1951
                                                     CallingConv::ID CC,
1952
1.19M
                                                     EVT VT) const {
1953
1.19M
  if (VT == MVT::v32i1 && 
Subtarget.hasAVX512()245
&&
!Subtarget.hasBWI()187
)
1954
66
    return MVT::v32i8;
1955
1.19M
  return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1956
1.19M
}
1957
1958
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1959
                                                          CallingConv::ID CC,
1960
1.19M
                                                          EVT VT) const {
1961
1.19M
  if (VT == MVT::v32i1 && 
Subtarget.hasAVX512()245
&&
!Subtarget.hasBWI()187
)
1962
66
    return 1;
1963
1.19M
  return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1964
1.19M
}
1965
1966
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1967
                                          LLVMContext& Context,
1968
379k
                                          EVT VT) const {
1969
379k
  if (!VT.isVector())
1970
357k
    return MVT::i8;
1971
21.0k
1972
21.0k
  if (Subtarget.hasAVX512()) {
1973
5.87k
    const unsigned NumElts = VT.getVectorNumElements();
1974
5.87k
1975
5.87k
    // Figure out what this type will be legalized to.
1976
5.87k
    EVT LegalVT = VT;
1977
6.20k
    while (getTypeAction(Context, LegalVT) != TypeLegal)
1978
328
      LegalVT = getTypeToTransformTo(Context, LegalVT);
1979
5.87k
1980
5.87k
    // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1981
5.87k
    if (LegalVT.getSimpleVT().is512BitVector())
1982
1.62k
      return EVT::getVectorVT(Context, MVT::i1, NumElts);
1983
4.24k
1984
4.24k
    if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1985
2.90k
      // If we legalized to less than a 512-bit vector, then we will use a vXi1
1986
2.90k
      // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1987
2.90k
      // vXi16/vXi8.
1988
2.90k
      MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1989
2.90k
      if (Subtarget.hasBWI() || 
EltVT.getSizeInBits() >= 321.23k
)
1990
2.66k
        return EVT::getVectorVT(Context, MVT::i1, NumElts);
1991
16.8k
    }
1992
4.24k
  }
1993
16.8k
1994
16.8k
  return VT.changeVectorElementTypeToInteger();
1995
16.8k
}
1996
1997
/// Helper for getByValTypeAlignment to determine
1998
/// the desired ByVal argument alignment.
1999
15
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
2000
15
  if (MaxAlign == 16)
2001
0
    return;
2002
15
  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2003
1
    if (VTy->getBitWidth() == 128)
2004
1
      MaxAlign = 16;
2005
14
  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2006
0
    unsigned EltAlign = 0;
2007
0
    getMaxByValAlign(ATy->getElementType(), EltAlign);
2008
0
    if (EltAlign > MaxAlign)
2009
0
      MaxAlign = EltAlign;
2010
14
  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2011
7
    for (auto *EltTy : STy->elements()) {
2012
7
      unsigned EltAlign = 0;
2013
7
      getMaxByValAlign(EltTy, EltAlign);
2014
7
      if (EltAlign > MaxAlign)
2015
1
        MaxAlign = EltAlign;
2016
7
      if (MaxAlign == 16)
2017
1
        break;
2018
7
    }
2019
4
  }
2020
15
}
2021
2022
/// Return the desired alignment for ByVal aggregate
2023
/// function arguments in the caller parameter area. For X86, aggregates
2024
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2025
/// are at 4-byte boundaries.
2026
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2027
167
                                                  const DataLayout &DL) const {
2028
167
  if (Subtarget.is64Bit()) {
2029
50
    // Max of 8 and alignment of type.
2030
50
    unsigned TyAlign = DL.getABITypeAlignment(Ty);
2031
50
    if (TyAlign > 8)
2032
0
      return TyAlign;
2033
50
    return 8;
2034
50
  }
2035
117
2036
117
  unsigned Align = 4;
2037
117
  if (Subtarget.hasSSE1())
2038
8
    getMaxByValAlign(Ty, Align);
2039
117
  return Align;
2040
117
}
2041
2042
/// Returns the target specific optimal type for load
2043
/// and store operations as a result of memset, memcpy, and memmove
2044
/// lowering. If DstAlign is zero that means it's safe to destination
2045
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
2046
/// means there isn't a need to check it against alignment requirement,
2047
/// probably because the source does not need to be loaded. If 'IsMemset' is
2048
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
2049
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
2050
/// source is constant so it does not need to be loaded.
2051
/// It returns EVT::Other if the type should be determined using generic
2052
/// target-independent logic.
2053
/// For vector ops we check that the overall size isn't larger than our
2054
/// preferred vector width.
2055
EVT X86TargetLowering::getOptimalMemOpType(
2056
    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
2057
    bool ZeroMemset, bool MemcpyStrSrc,
2058
4.50k
    const AttributeList &FuncAttributes) const {
2059
4.50k
  if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2060
4.50k
    if (Size >= 16 && 
(3.22k
!Subtarget.isUnalignedMem16Slow()3.22k
||
2061
3.22k
                       
(2.02k
(2.02k
DstAlign == 02.02k
||
DstAlign >= 161.27k
) &&
2062
2.02k
                        
(770
SrcAlign == 0770
||
SrcAlign >= 16536
)))) {
2063
1.48k
      // FIXME: Check if unaligned 32-byte accesses are slow.
2064
1.48k
      if (Size >= 32 && 
Subtarget.hasAVX()429
&&
2065
1.48k
          
(Subtarget.getPreferVectorWidth() >= 256)305
) {
2066
303
        // Although this isn't a well-supported type for AVX1, we'll let
2067
303
        // legalization and shuffle lowering produce the optimal codegen. If we
2068
303
        // choose an optimal type with a vector element larger than a byte,
2069
303
        // getMemsetStores() may create an intermediate splat (using an integer
2070
303
        // multiply) before we splat as a vector.
2071
303
        return MVT::v32i8;
2072
303
      }
2073
1.18k
      if (Subtarget.hasSSE2() && 
(Subtarget.getPreferVectorWidth() >= 128)1.16k
)
2074
1.16k
        return MVT::v16i8;
2075
16
      // TODO: Can SSE1 handle a byte vector?
2076
16
      // If we have SSE1 registers we should be able to use them.
2077
16
      if (Subtarget.hasSSE1() && 
(7
Subtarget.is64Bit()7
||
Subtarget.hasX87()6
) &&
2078
16
          
(Subtarget.getPreferVectorWidth() >= 128)6
)
2079
6
        return MVT::v4f32;
2080
3.01k
    } else if ((!IsMemset || 
ZeroMemset246
) &&
!MemcpyStrSrc2.98k
&&
Size >= 82.93k
&&
2081
3.01k
               
!Subtarget.is64Bit()2.55k
&&
Subtarget.hasSSE2()657
) {
2082
581
      // Do not use f64 to lower memcpy if source is string constant. It's
2083
581
      // better to use i32 to avoid the loads.
2084
581
      // Also, do not use f64 to lower memset unless this is a memset of zeros.
2085
581
      // The gymnastics of splatting a byte value into an XMM register and then
2086
581
      // only using 8-byte stores (because this is a CPU with slow unaligned
2087
581
      // 16-byte accesses) makes that a loser.
2088
581
      return MVT::f64;
2089
581
    }
2090
2.44k
  }
2091
2.44k
  // This is a compromise. If we reach here, unaligned accesses may be slow on
2092
2.44k
  // this target. However, creating smaller, aligned accesses could be even
2093
2.44k
  // slower and would certainly be a lot more code.
2094
2.44k
  if (Subtarget.is64Bit() && 
Size >= 82.25k
)
2095
1.93k
    return MVT::i64;
2096
514
  return MVT::i32;
2097
514
}
2098
2099
2.16k
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2100
2.16k
  if (VT == MVT::f32)
2101
0
    return X86ScalarSSEf32;
2102
2.16k
  else if (VT == MVT::f64)
2103
19
    return X86ScalarSSEf64;
2104
2.14k
  return true;
2105
2.14k
}
2106
2107
bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2108
    EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
2109
142k
    bool *Fast) const {
2110
142k
  if (Fast) {
2111
80.3k
    switch (VT.getSizeInBits()) {
2112
80.3k
    default:
2113
7.50k
      // 8-byte and under are always assumed to be fast.
2114
7.50k
      *Fast = true;
2115
7.50k
      break;
2116
80.3k
    case 128:
2117
49.1k
      *Fast = !Subtarget.isUnalignedMem16Slow();
2118
49.1k
      break;
2119
80.3k
    case 256:
2120
23.6k
      *Fast = !Subtarget.isUnalignedMem32Slow();
2121
23.6k
      break;
2122
142k
    // TODO: What about AVX-512 (512-bit) accesses?
2123
142k
    }
2124
142k
  }
2125
142k
  // NonTemporal vector memory ops must be aligned.
2126
142k
  if (!!(Flags & MachineMemOperand::MONonTemporal) && 
VT.isVector()9.38k
) {
2127
5.71k
    // NT loads can only be vector aligned, so if its less aligned than the
2128
5.71k
    // minimum vector size (which we can split the vector down to), we might as
2129
5.71k
    // well use a regular unaligned vector load.
2130
5.71k
    // We don't have any NT loads pre-SSE41.
2131
5.71k
    if (!!(Flags & MachineMemOperand::MOLoad))
2132
1.06k
      return (Align < 16 || 
!Subtarget.hasSSE41()178
);
2133
4.64k
    return false;
2134
4.64k
  }
2135
136k
  // Misaligned accesses of any size are always allowed.
2136
136k
  return true;
2137
136k
}
2138
2139
/// Return the entry encoding for a jump table in the
2140
/// current function.  The returned value is a member of the
2141
/// MachineJumpTableInfo::JTEntryKind enum.
2142
331
unsigned X86TargetLowering::getJumpTableEncoding() const {
2143
331
  // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2144
331
  // symbol.
2145
331
  if (isPositionIndependent() && 
Subtarget.isPICStyleGOT()259
)
2146
2
    return MachineJumpTableInfo::EK_Custom32;
2147
329
2148
329
  // Otherwise, use the normal jump table encoding heuristics.
2149
329
  return TargetLowering::getJumpTableEncoding();
2150
329
}
2151
2152
335k
bool X86TargetLowering::useSoftFloat() const {
2153
335k
  return Subtarget.useSoftFloat();
2154
335k
}
2155
2156
void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2157
2.71k
                                              ArgListTy &Args) const {
2158
2.71k
2159
2.71k
  // Only relabel X86-32 for C / Stdcall CCs.
2160
2.71k
  if (Subtarget.is64Bit())
2161
2.01k
    return;
2162
701
  if (CC != CallingConv::C && 
CC != CallingConv::X86_StdCall8
)
2163
0
    return;
2164
701
  unsigned ParamRegs = 0;
2165
701
  if (auto *M = MF->getFunction().getParent())
2166
701
    ParamRegs = M->getNumberRegisterParameters();
2167
701
2168
701
  // Mark the first N int arguments as having reg
2169
1.18k
  for (unsigned Idx = 0; Idx < Args.size(); 
Idx++482
) {
2170
894
    Type *T = Args[Idx].Ty;
2171
894
    if (T->isIntOrPtrTy())
2172
457
      if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2173
424
        unsigned numRegs = 1;
2174
424
        if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2175
141
          numRegs = 2;
2176
424
        if (ParamRegs < numRegs)
2177
412
          return;
2178
12
        ParamRegs -= numRegs;
2179
12
        Args[Idx].IsInReg = true;
2180
12
      }
2181
894
  }
2182
701
}
2183
2184
const MCExpr *
2185
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2186
                                             const MachineBasicBlock *MBB,
2187
48
                                             unsigned uid,MCContext &Ctx) const{
2188
48
  assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
2189
48
  // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2190
48
  // entries.
2191
48
  return MCSymbolRefExpr::create(MBB->getSymbol(),
2192
48
                                 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2193
48
}
2194
2195
/// Returns relocation base for the given PIC jumptable.
2196
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2197
258
                                                    SelectionDAG &DAG) const {
2198
258
  if (!Subtarget.is64Bit())
2199
58
    // This doesn't have SDLoc associated with it, but is not really the
2200
58
    // same as a Register.
2201
58
    return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2202
58
                       getPointerTy(DAG.getDataLayout()));
2203
200
  return Table;
2204
200
}
2205
2206
/// This returns the relocation base for the given PIC jumptable,
2207
/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2208
const MCExpr *X86TargetLowering::
2209
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2210
298
                             MCContext &Ctx) const {
2211
298
  // X86-64 uses RIP relative addressing based on the jump table label.
2212
298
  if (Subtarget.isPICStyleRIPRel())
2213
235
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2214
63
2215
63
  // Otherwise, the reference is relative to the PIC base.
2216
63
  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2217
63
}
2218
2219
std::pair<const TargetRegisterClass *, uint8_t>
2220
X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2221
1.98M
                                           MVT VT) const {
2222
1.98M
  const TargetRegisterClass *RRC = nullptr;
2223
1.98M
  uint8_t Cost = 1;
2224
1.98M
  switch (VT.SimpleTy) {
2225
1.98M
  default:
2226
1.60M
    return TargetLowering::findRepresentativeClass(TRI, VT);
2227
1.98M
  
case MVT::i8: 61.0k
case MVT::i16: 61.0k
case MVT::i32: 61.0k
case MVT::i64:
2228
61.0k
    RRC = Subtarget.is64Bit() ? 
&X86::GR64RegClass48.4k
:
&X86::GR32RegClass12.6k
;
2229
61.0k
    break;
2230
61.0k
  case MVT::x86mmx:
2231
15.2k
    RRC = &X86::VR64RegClass;
2232
15.2k
    break;
2233
305k
  case MVT::f32: case MVT::f64:
2234
305k
  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2235
305k
  case MVT::v4f32: case MVT::v2f64:
2236
305k
  case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2237
305k
  case MVT::v8f32: case MVT::v4f64:
2238
305k
  case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2239
305k
  case MVT::v16f32: case MVT::v8f64:
2240
305k
    RRC = &X86::VR128XRegClass;
2241
305k
    break;
2242
381k
  }
2243
381k
  return std::make_pair(RRC, Cost);
2244
381k
}
2245
2246
533
unsigned X86TargetLowering::getAddressSpace() const {
2247
533
  if (Subtarget.is64Bit())
2248
398
    return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 
256120
:
257278
;
2249
135
  return 256;
2250
135
}
2251
2252
1.34k
static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2253
1.34k
  return TargetTriple.isOSGlibc() || 
TargetTriple.isOSFuchsia()836
||
2254
1.34k
         
(834
TargetTriple.isAndroid()834
&&
!TargetTriple.isAndroidVersionLT(17)17
);
2255
1.34k
}
2256
2257
static Constant* SegmentOffset(IRBuilder<> &IRB,
2258
533
                               unsigned Offset, unsigned AddressSpace) {
2259
533
  return ConstantExpr::getIntToPtr(
2260
533
      ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2261
533
      Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2262
533
}
2263
2264
1.03k
Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2265
1.03k
  // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2266
1.03k
  // tcbhead_t; use it instead of the usual global variable (see
2267
1.03k
  // sysdeps/{i386,x86_64}/nptl/tls.h)
2268
1.03k
  if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2269
523
    if (Subtarget.isTargetFuchsia()) {
2270
2
      // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2271
2
      return SegmentOffset(IRB, 0x10, getAddressSpace());
2272
521
    } else {
2273
521
      // %fs:0x28, unless we're using a Kernel code model, in which case
2274
521
      // it's %gs:0x28.  gs:0x14 on i386.
2275
521
      unsigned Offset = (Subtarget.is64Bit()) ? 
0x28390
:
0x14131
;
2276
521
      return SegmentOffset(IRB, Offset, getAddressSpace());
2277
521
    }
2278
511
  }
2279
511
2280
511
  return TargetLowering::getIRStackGuard(IRB);
2281
511
}
2282
2283
389
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2284
389
  // MSVC CRT provides functionalities for stack protection.
2285
389
  if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2286
389
      
Subtarget.getTargetTriple().isWindowsItaniumEnvironment()312
) {
2287
78
    // MSVC CRT has a global variable holding security cookie.
2288
78
    M.getOrInsertGlobal("__security_cookie",
2289
78
                        Type::getInt8PtrTy(M.getContext()));
2290
78
2291
78
    // MSVC CRT has a function to validate security cookie.
2292
78
    FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2293
78
        "__security_check_cookie", Type::getVoidTy(M.getContext()),
2294
78
        Type::getInt8PtrTy(M.getContext()));
2295
78
    if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2296
78
      F->setCallingConv(CallingConv::X86_FastCall);
2297
78
      F->addAttribute(1, Attribute::AttrKind::InReg);
2298
78
    }
2299
78
    return;
2300
78
  }
2301
311
  // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2302
311
  if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2303
0
    return;
2304
311
  TargetLowering::insertSSPDeclarations(M);
2305
311
}
2306
2307
810
Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2308
810
  // MSVC CRT has a global variable holding security cookie.
2309
810
  if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2310
810
      
Subtarget.getTargetTriple().isWindowsItaniumEnvironment()733
) {
2311
78
    return M.getGlobalVariable("__security_cookie");
2312
78
  }
2313
732
  return TargetLowering::getSDagStackGuard(M);
2314
732
}
2315
2316
1.05k
Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2317
1.05k
  // MSVC CRT has a function to validate security cookie.
2318
1.05k
  if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2319
1.05k
      
Subtarget.getTargetTriple().isWindowsItaniumEnvironment()891
) {
2320
164
    return M.getFunction("__security_check_cookie");
2321
164
  }
2322
889
  return TargetLowering::getSSPStackGuardCheck(M);
2323
889
}
2324
2325
151
Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2326
151
  if (Subtarget.getTargetTriple().isOSContiki())
2327
4
    return getDefaultSafeStackPointerLocation(IRB, false);
2328
147
2329
147
  // Android provides a fixed TLS slot for the SafeStack pointer. See the
2330
147
  // definition of TLS_SLOT_SAFESTACK in
2331
147
  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2332
147
  if (Subtarget.isTargetAndroid()) {
2333
7
    // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2334
7
    // %gs:0x24 on i386
2335
7
    unsigned Offset = (Subtarget.is64Bit()) ? 
0x483
:
0x244
;
2336
7
    return SegmentOffset(IRB, Offset, getAddressSpace());
2337
7
  }
2338
140
2339
140
  // Fuchsia is similar.
2340
140
  if (Subtarget.isTargetFuchsia()) {
2341
3
    // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2342
3
    return SegmentOffset(IRB, 0x18, getAddressSpace());
2343
3
  }
2344
137
2345
137
  return TargetLowering::getSafeStackPointerLocation(IRB);
2346
137
}
2347
2348
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2349
136
                                            unsigned DestAS) const {
2350
136
  assert(SrcAS != DestAS && "Expected different address spaces!");
2351
136
2352
136
  return SrcAS < 256 && DestAS < 256;
2353
136
}
2354
2355
//===----------------------------------------------------------------------===//
2356
//               Return Value Calling Convention Implementation
2357
//===----------------------------------------------------------------------===//
2358
2359
bool X86TargetLowering::CanLowerReturn(
2360
    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2361
280k
    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2362
280k
  SmallVector<CCValAssign, 16> RVLocs;
2363
280k
  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2364
280k
  return CCInfo.CheckReturn(Outs, RetCC_X86);
2365
280k
}
2366
2367
172
const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2368
172
  static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2369
172
  return ScratchRegs;
2370
172
}
2371
2372
/// Lowers masks values (v*i1) to the local register values
2373
/// \returns DAG node after lowering to register type
2374
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2375
320
                               const SDLoc &Dl, SelectionDAG &DAG) {
2376
320
  EVT ValVT = ValArg.getValueType();
2377
320
2378
320
  if (ValVT == MVT::v1i1)
2379
3
    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2380
3
                       DAG.getIntPtrConstant(0, Dl));
2381
317
2382
317
  if ((ValVT == MVT::v8i1 && 
(78
ValLoc == MVT::i878
||
ValLoc == MVT::i3275
)) ||
2383
317
      
(302
ValVT == MVT::v16i1302
&&
(96
ValLoc == MVT::i1696
||
ValLoc == MVT::i3293
))) {
2384
30
    // Two stage lowering might be required
2385
30
    // bitcast:   v8i1 -> i8 / v16i1 -> i16
2386
30
    // anyextend: i8   -> i32 / i16   -> i32
2387
30
    EVT TempValLoc = ValVT == MVT::v8i1 ? 
MVT::i815
:
MVT::i1615
;
2388
30
    SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2389
30
    if (ValLoc == MVT::i32)
2390
24
      ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2391
30
    return ValToCopy;
2392
30
  }
2393
287
2394
287
  if ((ValVT == MVT::v32i1 && 
ValLoc == MVT::i3236
) ||
2395
287
      
(275
ValVT == MVT::v64i1275
&&
ValLoc == MVT::i6452
)) {
2396
54
    // One stage lowering is required
2397
54
    // bitcast:   v32i1 -> i32 / v64i1 -> i64
2398
54
    return DAG.getBitcast(ValLoc, ValArg);
2399
54
  }
2400
233
2401
233
  return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2402
233
}
2403
2404
/// Breaks v64i1 value into two registers and adds the new node to the DAG
2405
static void Passv64i1ArgInRegs(
2406
    const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2407
    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2408
3
    CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2409
3
  assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
2410
3
  assert(Subtarget.is32Bit() && "Expecting 32 bit target");
2411
3
  assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
2412
3
  assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2413
3
         "The value should reside in two registers");
2414
3
2415
3
  // Before splitting the value we cast it to i64
2416
3
  Arg = DAG.getBitcast(MVT::i64, Arg);
2417
3
2418
3
  // Splitting the value into two i32 types
2419
3
  SDValue Lo, Hi;
2420
3
  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2421
3
                   DAG.getConstant(0, Dl, MVT::i32));
2422
3
  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2423
3
                   DAG.getConstant(1, Dl, MVT::i32));
2424
3
2425
3
  // Attach the two i32 types into corresponding registers
2426
3
  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2427
3
  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2428
3
}
2429
2430
SDValue
2431
X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2432
                               bool isVarArg,
2433
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
2434
                               const SmallVectorImpl<SDValue> &OutVals,
2435
125k
                               const SDLoc &dl, SelectionDAG &DAG) const {
2436
125k
  MachineFunction &MF = DAG.getMachineFunction();
2437
125k
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2438
125k
2439
125k
  // In some cases we need to disable registers from the default CSR list.
2440
125k
  // For example, when they are used for argument passing.
2441
125k
  bool ShouldDisableCalleeSavedRegister =
2442
125k
      CallConv == CallingConv::X86_RegCall ||
2443
125k
      
MF.getFunction().hasFnAttribute("no_caller_saved_registers")124k
;
2444
125k
2445
125k
  if (CallConv == CallingConv::X86_INTR && 
!Outs.empty()39
)
2446
0
    report_fatal_error("X86 interrupts may not return any value");
2447
125k
2448
125k
  SmallVector<CCValAssign, 16> RVLocs;
2449
125k
  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2450
125k
  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2451
125k
2452
125k
  SDValue Flag;
2453
125k
  SmallVector<SDValue, 6> RetOps;
2454
125k
  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2455
125k
  // Operand #1 = Bytes To Pop
2456
125k
  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2457
125k
                   MVT::i32));
2458
125k
2459
125k
  // Copy the result values into the output registers.
2460
235k
  for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2461
125k
       
++I, ++OutsIndex109k
) {
2462
109k
    CCValAssign &VA = RVLocs[I];
2463
109k
    assert(VA.isRegLoc() && "Can only return in registers!");
2464
109k
2465
109k
    // Add the register to the CalleeSaveDisableRegs list.
2466
109k
    if (ShouldDisableCalleeSavedRegister)
2467
196
      MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2468
109k
2469
109k
    SDValue ValToCopy = OutVals[OutsIndex];
2470
109k
    EVT ValVT = ValToCopy.getValueType();
2471
109k
2472
109k
    // Promote values to the appropriate types.
2473
109k
    if (VA.getLocInfo() == CCValAssign::SExt)
2474
0
      ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2475
109k
    else if (VA.getLocInfo() == CCValAssign::ZExt)
2476
0
      ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2477
109k
    else if (VA.getLocInfo() == CCValAssign::AExt) {
2478
215
      if (ValVT.isVector() && 
ValVT.getVectorElementType() == MVT::i1206
)
2479
206
        ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2480
9
      else
2481
9
        ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2482
215
    }
2483
109k
    else if (VA.getLocInfo() == CCValAssign::BCvt)
2484
0
      ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2485
109k
2486
109k
    assert(VA.getLocInfo() != CCValAssign::FPExt &&
2487
109k
           "Unexpected FP-extend for return value.");
2488
109k
2489
109k
    // If this is x86-64, and we disabled SSE, we can't return FP values,
2490
109k
    // or SSE or MMX vectors.
2491
109k
    if ((ValVT == MVT::f32 || 
ValVT == MVT::f64107k
||
2492
109k
         
VA.getLocReg() == X86::XMM0105k
||
VA.getLocReg() == X86::XMM169.6k
) &&
2493
109k
        
(43.2k
Subtarget.is64Bit()43.2k
&&
!Subtarget.hasSSE1()36.8k
)) {
2494
1
      errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2495
1
      VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2496
109k
    } else if (ValVT == MVT::f64 &&
2497
109k
               
(2.30k
Subtarget.is64Bit()2.30k
&&
!Subtarget.hasSSE2()1.77k
)) {
2498
0
      // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
2499
0
      // llvm-gcc has never done it right and no one has noticed, so this
2500
0
      // should be OK for now.
2501
0
      errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2502
0
      VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2503
0
    }
2504
109k
2505
109k
    // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2506
109k
    // the RET instruction and handled by the FP Stackifier.
2507
109k
    if (VA.getLocReg() == X86::FP0 ||
2508
109k
        
VA.getLocReg() == X86::FP1108k
) {
2509
1.32k
      // If this is a copy from an xmm register to ST(0), use an FPExtend to
2510
1.32k
      // change the value to the FP stack register class.
2511
1.32k
      if (isScalarFPTypeInSSEReg(VA.getValVT()))
2512
415
        ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2513
1.32k
      RetOps.push_back(ValToCopy);
2514
1.32k
      // Don't emit a copytoreg.
2515
1.32k
      continue;
2516
1.32k
    }
2517
108k
2518
108k
    // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2519
108k
    // which is returned in RAX / RDX.
2520
108k
    if (Subtarget.is64Bit()) {
2521
89.2k
      if (ValVT == MVT::x86mmx) {
2522
102
        if (VA.getLocReg() == X86::XMM0 || 
VA.getLocReg() == X86::XMM10
) {
2523
102
          ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2524
102
          ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2525
102
                                  ValToCopy);
2526
102
          // If we don't have SSE2 available, convert to v4f32 so the generated
2527
102
          // register is legal.
2528
102
          if (!Subtarget.hasSSE2())
2529
0
            ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2530
102
        }
2531
102
      }
2532
89.2k
    }
2533
108k
2534
108k
    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2535
108k
2536
108k
    if (VA.needsCustom()) {
2537
1
      assert(VA.getValVT() == MVT::v64i1 &&
2538
1
             "Currently the only custom case is when we split v64i1 to 2 regs");
2539
1
2540
1
      Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2541
1
                         Subtarget);
2542
1
2543
1
      assert(2 == RegsToPass.size() &&
2544
1
             "Expecting two registers after Pass64BitArgInRegs");
2545
1
2546
1
      // Add the second register to the CalleeSaveDisableRegs list.
2547
1
      if (ShouldDisableCalleeSavedRegister)
2548
1
        MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2549
108k
    } else {
2550
108k
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2551
108k
    }
2552
108k
2553
108k
    // Add nodes to the DAG and add the values into the RetOps list
2554
108k
    for (auto &Reg : RegsToPass) {
2555
108k
      Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2556
108k
      Flag = Chain.getValue(1);
2557
108k
      RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2558
108k
    }
2559
108k
  }
2560
125k
2561
125k
  // Swift calling convention does not require we copy the sret argument
2562
125k
  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2563
125k
2564
125k
  // All x86 ABIs require that for returning structs by value we copy
2565
125k
  // the sret argument into %rax/%eax (depending on ABI) for the return.
2566
125k
  // We saved the argument into a virtual register in the entry block,
2567
125k
  // so now we copy the value out and into %rax/%eax.
2568
125k
  //
2569
125k
  // Checking Function.hasStructRetAttr() here is insufficient because the IR
2570
125k
  // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2571
125k
  // false, then an sret argument may be implicitly inserted in the SelDAG. In
2572
125k
  // either case FuncInfo->setSRetReturnReg() will have been called.
2573
125k
  if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2574
1.15k
    // When we have both sret and another return value, we should use the
2575
1.15k
    // original Chain stored in RetOps[0], instead of the current Chain updated
2576
1.15k
    // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2577
1.15k
2578
1.15k
    // For the case of sret and another return value, we have
2579
1.15k
    //   Chain_0 at the function entry
2580
1.15k
    //   Chain_1 = getCopyToReg(Chain_0) in the above loop
2581
1.15k
    // If we use Chain_1 in getCopyFromReg, we will have
2582
1.15k
    //   Val = getCopyFromReg(Chain_1)
2583
1.15k
    //   Chain_2 = getCopyToReg(Chain_1, Val) from below
2584
1.15k
2585
1.15k
    // getCopyToReg(Chain_0) will be glued together with
2586
1.15k
    // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2587
1.15k
    // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2588
1.15k
    //   Data dependency from Unit B to Unit A due to usage of Val in
2589
1.15k
    //     getCopyToReg(Chain_1, Val)
2590
1.15k
    //   Chain dependency from Unit A to Unit B
2591
1.15k
2592
1.15k
    // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2593
1.15k
    SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2594
1.15k
                                     getPointerTy(MF.getDataLayout()));
2595
1.15k
2596
1.15k
    unsigned RetValReg
2597
1.15k
        = (Subtarget.is64Bit() && 
!Subtarget.isTarget64BitILP32()550
) ?
2598
621
          
X86::RAX537
: X86::EAX;
2599
1.15k
    Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2600
1.15k
    Flag = Chain.getValue(1);
2601
1.15k
2602
1.15k
    // RAX/EAX now acts like a return value.
2603
1.15k
    RetOps.push_back(
2604
1.15k
        DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2605
1.15k
2606
1.15k
    // Add the returned register to the CalleeSaveDisableRegs list.
2607
1.15k
    if (ShouldDisableCalleeSavedRegister)
2608
0
      MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2609
1.15k
  }
2610
125k
2611
125k
  const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2612
125k
  const MCPhysReg *I =
2613
125k
      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2614
125k
  if (I) {
2615
260
    for (; *I; 
++I240
) {
2616
240
      if (X86::GR64RegClass.contains(*I))
2617
240
        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2618
240
      else
2619
240
        
llvm_unreachable0
("Unexpected register class in CSRsViaCopy!");
2620
240
    }
2621
20
  }
2622
125k
2623
125k
  RetOps[0] = Chain;  // Update chain.
2624
125k
2625
125k
  // Add the flag if we have it.
2626
125k
  if (Flag.getNode())
2627
101k
    RetOps.push_back(Flag);
2628
125k
2629
125k
  X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2630
125k
  if (CallConv == CallingConv::X86_INTR)
2631
39
    opcode = X86ISD::IRET;
2632
125k
  return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2633
125k
}
2634
2635
1.34k
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2636
1.34k
  if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2637
15
    return false;
2638
1.32k
2639
1.32k
  SDValue TCChain = Chain;
2640
1.32k
  SDNode *Copy = *N->use_begin();
2641
1.32k
  if (Copy->getOpcode() == ISD::CopyToReg) {
2642
170
    // If the copy has a glue operand, we conservatively assume it isn't safe to
2643
170
    // perform a tail call.
2644
170
    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2645
2
      return false;
2646
168
    TCChain = Copy->getOperand(0);
2647
1.15k
  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2648
1.09k
    return false;
2649
228
2650
228
  bool HasRet = false;
2651
228
  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2652
575
       UI != UE; 
++UI347
) {
2653
387
    if (UI->getOpcode() != X86ISD::RET_FLAG)
2654
21
      return false;
2655
366
    // If we are returning more than one value, we can definitely
2656
366
    // not make a tail call see PR19530
2657
366
    if (UI->getNumOperands() > 4)
2658
19
      return false;
2659
347
    if (UI->getNumOperands() == 4 &&
2660
347
        
UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue326
)
2661
0
      return false;
2662
347
    HasRet = true;
2663
347
  }
2664
228
2665
228
  
if (188
!HasRet188
)
2666
4
    return false;
2667
184
2668
184
  Chain = TCChain;
2669
184
  return true;
2670
184
}
2671
2672
EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2673
3.92k
                                           ISD::NodeType ExtendKind) const {
2674
3.92k
  MVT ReturnMVT = MVT::i32;
2675
3.92k
2676
3.92k
  bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2677
3.92k
  if (VT == MVT::i1 || 
(2.10k
!Darwin2.10k
&&
(1.84k
VT == MVT::i81.84k
||
VT == MVT::i161.46k
))) {
2678
2.60k
    // The ABI does not require i1, i8 or i16 to be extended.
2679
2.60k
    //
2680
2.60k
    // On Darwin, there is code in the wild relying on Clang's old behaviour of
2681
2.60k
    // always extending i8/i16 return values, so keep doing that for now.
2682
2.60k
    // (PR26665).
2683
2.60k
    ReturnMVT = MVT::i8;
2684
2.60k
  }
2685
3.92k
2686
3.92k
  EVT MinVT = getRegisterType(Context, ReturnMVT);
2687
3.92k
  return VT.bitsLT(MinVT) ? 
MinVT2.13k
:
VT1.79k
;
2688
3.92k
}
2689
2690
/// Reads two 32 bit registers and creates a 64 bit mask value.
2691
/// \param VA The current 32 bit value that need to be assigned.
2692
/// \param NextVA The next 32 bit value that need to be assigned.
2693
/// \param Root The parent DAG node.
2694
/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2695
///                        glue purposes. In the case the DAG is already using
2696
///                        physical register instead of virtual, we should glue
2697
///                        our new SDValue to InFlag SDvalue.
2698
/// \return a new SDvalue of size 64bit.
2699
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2700
                                SDValue &Root, SelectionDAG &DAG,
2701
                                const SDLoc &Dl, const X86Subtarget &Subtarget,
2702
3
                                SDValue *InFlag = nullptr) {
2703
3
  assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
2704
3
  assert(Subtarget.is32Bit() && "Expecting 32 bit target");
2705
3
  assert(VA.getValVT() == MVT::v64i1 &&
2706
3
         "Expecting first location of 64 bit width type");
2707
3
  assert(NextVA.getValVT() == VA.getValVT() &&
2708
3
         "The locations should have the same type");
2709
3
  assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2710
3
         "The values should reside in two registers");
2711
3
2712
3
  SDValue Lo, Hi;
2713
3
  SDValue ArgValueLo, ArgValueHi;
2714
3
2715
3
  MachineFunction &MF = DAG.getMachineFunction();
2716
3
  const TargetRegisterClass *RC = &X86::GR32RegClass;
2717
3
2718
3
  // Read a 32 bit value from the registers.
2719
3
  if (nullptr == InFlag) {
2720
2
    // When no physical register is present,
2721
2
    // create an intermediate virtual register.
2722
2
    unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2723
2
    ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2724
2
    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2725
2
    ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2726
2
  } else {
2727
1
    // When a physical register is available read the value from it and glue
2728
1
    // the reads together.
2729
1
    ArgValueLo =
2730
1
      DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2731
1
    *InFlag = ArgValueLo.getValue(2);
2732
1
    ArgValueHi =
2733
1
      DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2734
1
    *InFlag = ArgValueHi.getValue(2);
2735
1
  }
2736
3
2737
3
  // Convert the i32 type into v32i1 type.
2738
3
  Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2739
3
2740
3
  // Convert the i32 type into v32i1 type.
2741
3
  Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2742
3
2743
3
  // Concatenate the two values together.
2744
3
  return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2745
3
}
2746
2747
/// The function will lower a register of various sizes (8/16/32/64)
2748
/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2749
/// \returns a DAG node contains the operand after lowering to mask type.
2750
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2751
                               const EVT &ValLoc, const SDLoc &Dl,
2752
88
                               SelectionDAG &DAG) {
2753
88
  SDValue ValReturned = ValArg;
2754
88
2755
88
  if (ValVT == MVT::v1i1)
2756
12
    return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2757
76
2758
76
  if (ValVT == MVT::v64i1) {
2759
28
    // In 32 bit machine, this case is handled by getv64i1Argument
2760
28
    assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
2761
28
    // In 64 bit machine, There is no need to truncate the value only bitcast
2762
48
  } else {
2763
48
    MVT maskLen;
2764
48
    switch (ValVT.getSimpleVT().SimpleTy) {
2765
48
    case MVT::v8i1:
2766
18
      maskLen = MVT::i8;
2767
18
      break;
2768
48
    case MVT::v16i1:
2769
18
      maskLen = MVT::i16;
2770
18
      break;
2771
48
    case MVT::v32i1:
2772
12
      maskLen = MVT::i32;
2773
12
      break;
2774
48
    default:
2775
0
      llvm_unreachable("Expecting a vector of i1 types");
2776
48
    }
2777
48
2778
48
    ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2779
48
  }
2780
76
  return DAG.getBitcast(ValVT, ValReturned);
2781
76
}
2782
2783
/// Lower the result values of a call into the
2784
/// appropriate copies out of appropriate physical registers.
2785
///
2786
SDValue X86TargetLowering::LowerCallResult(
2787
    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2788
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2789
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2790
134k
    uint32_t *RegMask) const {
2791
134k
2792
134k
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2793
134k
  // Assign locations to each value returned by this call.
2794
134k
  SmallVector<CCValAssign, 16> RVLocs;
2795
134k
  bool Is64Bit = Subtarget.is64Bit();
2796
134k
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2797
134k
                 *DAG.getContext());
2798
134k
  CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2799
134k
2800
134k
  // Copy all of the result registers out of their specified physreg.
2801
196k
  for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2802
134k
       
++I, ++InsIndex62.3k
) {
2803
62.3k
    CCValAssign &VA = RVLocs[I];
2804
62.3k
    EVT CopyVT = VA.getLocVT();
2805
62.3k
2806
62.3k
    // In some calling conventions we need to remove the used registers
2807
62.3k
    // from the register mask.
2808
62.3k
    if (RegMask) {
2809
72
      for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2810
299
           SubRegs.isValid(); 
++SubRegs227
)
2811
227
        RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2812
72
    }
2813
62.3k
2814
62.3k
    // If this is x86-64, and we disabled SSE, we can't return FP values
2815
62.3k
    if ((CopyVT == MVT::f32 || 
CopyVT == MVT::f6461.6k
||
CopyVT == MVT::f12860.9k
) &&
2816
62.3k
        
(1.47k
(1.47k
Is64Bit1.47k
||
Ins[InsIndex].Flags.isInReg()275
) &&
!Subtarget.hasSSE1()1.20k
)) {
2817
9
      errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2818
9
      VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2819
9
    }
2820
62.3k
2821
62.3k
    // If we prefer to use the value in xmm registers, copy it out as f80 and
2822
62.3k
    // use a truncate to move it from fp stack reg to xmm reg.
2823
62.3k
    bool RoundAfterCopy = false;
2824
62.3k
    if ((VA.getLocReg() == X86::FP0 || 
VA.getLocReg() == X86::FP161.7k
) &&
2825
62.3k
        
isScalarFPTypeInSSEReg(VA.getValVT())539
) {
2826
169
      if (!Subtarget.hasX87())
2827
0
        report_fatal_error("X87 register return with X87 disabled");
2828
169
      CopyVT = MVT::f80;
2829
169
      RoundAfterCopy = (CopyVT != VA.getLocVT());
2830
169
    }
2831
62.3k
2832
62.3k
    SDValue Val;
2833
62.3k
    if (VA.needsCustom()) {
2834
1
      assert(VA.getValVT() == MVT::v64i1 &&
2835
1
             "Currently the only custom case is when we split v64i1 to 2 regs");
2836
1
      Val =
2837
1
          getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2838
62.3k
    } else {
2839
62.3k
      Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2840
62.3k
                  .getValue(1);
2841
62.3k
      Val = Chain.getValue(0);
2842
62.3k
      InFlag = Chain.getValue(2);
2843
62.3k
    }
2844
62.3k
2845
62.3k
    if (RoundAfterCopy)
2846
169
      Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2847
169
                        // This truncation won't change the value.
2848
169
                        DAG.getIntPtrConstant(1, dl));
2849
62.3k
2850
62.3k
    if (VA.isExtInLoc() && 
(VA.getValVT().getScalarType() == MVT::i1)24
) {
2851
24
      if (VA.getValVT().isVector() &&
2852
24
          ((VA.getLocVT() == MVT::i64) || 
(VA.getLocVT() == MVT::i32)21
||
2853
24
           
(VA.getLocVT() == MVT::i16)18
||
(VA.getLocVT() == MVT::i8)15
)) {
2854
12
        // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2855
12
        Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2856
12
      } else
2857
12
        Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2858
24
    }
2859
62.3k
2860
62.3k
    InVals.push_back(Val);
2861
62.3k
  }
2862
134k
2863
134k
  return Chain;
2864
134k
}
2865
2866
//===----------------------------------------------------------------------===//
2867
//                C & StdCall & Fast Calling Convention implementation
2868
//===----------------------------------------------------------------------===//
2869
//  StdCall calling convention seems to be standard for many Windows' API
2870
//  routines and around. It differs from C calling convention just a little:
2871
//  callee should clean up the stack, not caller. Symbols should be also
2872
//  decorated in some fancy way :) It doesn't support any vector arguments.
2873
//  For info on fast calling convention see Fast Calling Convention (tail call)
2874
//  implementation LowerX86_32FastCCCallTo.
2875
2876
/// CallIsStructReturn - Determines whether a call uses struct return
2877
/// semantics.
2878
enum StructReturnType {
2879
  NotStructReturn,
2880
  RegStructReturn,
2881
  StackStructReturn
2882
};
2883
static StructReturnType
2884
142k
callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2885
142k
  if (Outs.empty())
2886
27.5k
    return NotStructReturn;
2887
114k
2888
114k
  const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2889
114k
  if (!Flags.isSRet())
2890
114k
    return NotStructReturn;
2891
592
  if (Flags.isInReg() || 
IsMCU591
)
2892
1
    return RegStructReturn;
2893
591
  return StackStructReturn;
2894
591
}
2895
2896
/// Determines whether a function uses struct return semantics.
2897
static StructReturnType
2898
25.5k
argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2899
25.5k
  if (Ins.empty())
2900
2.64k
    return NotStructReturn;
2901
22.8k
2902
22.8k
  const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2903
22.8k
  if (!Flags.isSRet())
2904
22.3k
    return NotStructReturn;
2905
561
  if (Flags.isInReg() || 
IsMCU559
)
2906
3
    return RegStructReturn;
2907
558
  return StackStructReturn;
2908
558
}
2909
2910
/// Make a copy of an aggregate at address specified by "Src" to address
2911
/// "Dst" with size and alignment information specified by the specific
2912
/// parameter attribute. The copy will be passed as a byval function parameter.
2913
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2914
                                         SDValue Chain, ISD::ArgFlagsTy Flags,
2915
517
                                         SelectionDAG &DAG, const SDLoc &dl) {
2916
517
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2917
517
2918
517
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2919
517
                       /*isVolatile*/false, /*AlwaysInline=*/true,
2920
517
                       /*isTailCall*/false,
2921
517
                       MachinePointerInfo(), MachinePointerInfo());
2922
517
}
2923
2924
/// Return true if the calling convention is one that we can guarantee TCO for.
2925
46.4k
static bool canGuaranteeTCO(CallingConv::ID CC) {
2926
46.4k
  return (CC == CallingConv::Fast || 
CC == CallingConv::GHC45.1k
||
2927
46.4k
          
CC == CallingConv::X86_RegCall45.0k
||
CC == CallingConv::HiPE45.0k
||
2928
46.4k
          
CC == CallingConv::HHVM44.9k
);
2929
46.4k
}
2930
2931
/// Return true if we might ever do TCO for calls with this calling convention.
2932
12.6k
static bool mayTailCallThisCC(CallingConv::ID CC) {
2933
12.6k
  switch (CC) {
2934
12.6k
  // C calling conventions:
2935
12.6k
  case CallingConv::C:
2936
12.1k
  case CallingConv::Win64:
2937
12.1k
  case CallingConv::X86_64_SysV:
2938
12.1k
  // Callee pop conventions:
2939
12.1k
  case CallingConv::X86_ThisCall:
2940
12.1k
  case CallingConv::X86_StdCall:
2941
12.1k
  case CallingConv::X86_VectorCall:
2942
12.1k
  case CallingConv::X86_FastCall:
2943
12.1k
  // Swift:
2944
12.1k
  case CallingConv::Swift:
2945
12.1k
    return true;
2946
12.1k
  default:
2947
480
    return canGuaranteeTCO(CC);
2948
12.6k
  }
2949
12.6k
}
2950
2951
/// Return true if the function is being made into a tailcall target by
2952
/// changing its ABI.
2953
446k
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2954
446k
  return GuaranteedTailCallOpt && 
canGuaranteeTCO(CC)143
;
2955
446k
}
2956
2957
6.13k
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2958
6.13k
  auto Attr =
2959
6.13k
      CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2960
6.13k
  if (!CI->isTailCall() || 
Attr.getValueAsString() == "true"3.99k
)
2961
2.13k
    return false;
2962
3.99k
2963
3.99k
  ImmutableCallSite CS(CI);
2964
3.99k
  CallingConv::ID CalleeCC = CS.getCallingConv();
2965
3.99k
  if (!mayTailCallThisCC(CalleeCC))
2966
0
    return false;
2967
3.99k
2968
3.99k
  return true;
2969
3.99k
}
2970
2971
SDValue
2972
X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2973
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
2974
                                    const SDLoc &dl, SelectionDAG &DAG,
2975
                                    const CCValAssign &VA,
2976
41.5k
                                    MachineFrameInfo &MFI, unsigned i) const {
2977
41.5k
  // Create the nodes corresponding to a load from this parameter slot.
2978
41.5k
  ISD::ArgFlagsTy Flags = Ins[i].Flags;
2979
41.5k
  bool AlwaysUseMutable = shouldGuaranteeTCO(
2980
41.5k
      CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2981
41.5k
  bool isImmutable = !AlwaysUseMutable && 
!Flags.isByVal()41.5k
;
2982
41.5k
  EVT ValVT;
2983
41.5k
  MVT PtrVT = getPointerTy(DAG.getDataLayout());
2984
41.5k
2985
41.5k
  // If value is passed by pointer we have address passed instead of the value
2986
41.5k
  // itself. No need to extend if the mask value and location share the same
2987
41.5k
  // absolute size.
2988
41.5k
  bool ExtendedInMem =
2989
41.5k
      VA.isExtInLoc() && 
VA.getValVT().getScalarType() == MVT::i19.10k
&&
2990
41.5k
      
VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits()19
;
2991
41.5k
2992
41.5k
  if (VA.getLocInfo() == CCValAssign::Indirect || 
ExtendedInMem41.5k
)
2993
20
    ValVT = VA.getLocVT();
2994
41.5k
  else
2995
41.5k
    ValVT = VA.getValVT();
2996
41.5k
2997
41.5k
  // FIXME: For now, all byval parameter objects are marked mutable. This can be
2998
41.5k
  // changed with more analysis.
2999
41.5k
  // In case of tail call optimization mark all arguments mutable. Since they
3000
41.5k
  // could be overwritten by lowering of arguments in case of a tail call.
3001
41.5k
  if (Flags.isByVal()) {
3002
310
    unsigned Bytes = Flags.getByValSize();
3003
310
    if (Bytes == 0) 
Bytes = 11
; // Don't create zero-sized stack objects.
3004
310
3005
310
    // FIXME: For now, all byval parameter objects are marked as aliasing. This
3006
310
    // can be improved with deeper analysis.
3007
310
    int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3008
310
                                   /*isAliased=*/true);
3009
310
    return DAG.getFrameIndex(FI, PtrVT);
3010
310
  }
3011
41.2k
3012
41.2k
  // This is an argument in memory. We might be able to perform copy elision.
3013
41.2k
  // If the argument is passed directly in memory without any extension, then we
3014
41.2k
  // can perform copy elision. Large vector types, for example, may be passed
3015
41.2k
  // indirectly by pointer.
3016
41.2k
  if (Flags.isCopyElisionCandidate() &&
3017
41.2k
      
VA.getLocInfo() != CCValAssign::Indirect335
&&
!ExtendedInMem331
) {
3018
331
    EVT ArgVT = Ins[i].ArgVT;
3019
331
    SDValue PartAddr;
3020
331
    if (Ins[i].PartOffset == 0) {
3021
324
      // If this is a one-part value or the first part of a multi-part value,
3022
324
      // create a stack object for the entire argument value type and return a
3023
324
      // load from our portion of it. This assumes that if the first part of an
3024
324
      // argument is in memory, the rest will also be in memory.
3025
324
      int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3026
324
                                     /*IsImmutable=*/false);
3027
324
      PartAddr = DAG.getFrameIndex(FI, PtrVT);
3028
324
      return DAG.getLoad(
3029
324
          ValVT, dl, Chain, PartAddr,
3030
324
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3031
324
    } else {
3032
7
      // This is not the first piece of an argument in memory. See if there is
3033
7
      // already a fixed stack object including this offset. If so, assume it
3034
7
      // was created by the PartOffset == 0 branch above and create a load from
3035
7
      // the appropriate offset into it.
3036
7
      int64_t PartBegin = VA.getLocMemOffset();
3037
7
      int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3038
7
      int FI = MFI.getObjectIndexBegin();
3039
7
      for (; MFI.isFixedObjectIndex(FI); 
++FI0
) {
3040
6
        int64_t ObjBegin = MFI.getObjectOffset(FI);
3041
6
        int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3042
6
        if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3043
6
          break;
3044
6
      }
3045
7
      if (MFI.isFixedObjectIndex(FI)) {
3046
6
        SDValue Addr =
3047
6
            DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3048
6
                        DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3049
6
        return DAG.getLoad(
3050
6
            ValVT, dl, Chain, Addr,
3051
6
            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3052
6
                                              Ins[i].PartOffset));
3053
6
      }
3054
40.9k
    }
3055
331
  }
3056
40.9k
3057
40.9k
  int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3058
40.9k
                                 VA.getLocMemOffset(), isImmutable);
3059
40.9k
3060
40.9k
  // Set SExt or ZExt flag.
3061
40.9k
  if (VA.getLocInfo() == CCValAssign::ZExt) {
3062
1.27k
    MFI.setObjectZExt(FI, true);
3063
39.6k
  } else if (VA.getLocInfo() == CCValAssign::SExt) {
3064
60
    MFI.setObjectSExt(FI, true);
3065
60
  }
3066
40.9k
3067
40.9k
  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3068
40.9k
  SDValue Val = DAG.getLoad(
3069
40.9k
      ValVT, dl, Chain, FIN,
3070
40.9k
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3071
40.9k
  return ExtendedInMem
3072
40.9k
             ? (VA.getValVT().isVector()
3073
5
                    ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3074
5
                    : 
DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)0
)
3075
40.9k
             : 
Val40.9k
;
3076
40.9k
}
3077
3078
// FIXME: Get this from tablegen.
3079
static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3080
159
                                                const X86Subtarget &Subtarget) {
3081
159
  assert(Subtarget.is64Bit());
3082
159
3083
159
  if (Subtarget.isCallingConvWin64(CallConv)) {
3084
19
    static const MCPhysReg GPR64ArgRegsWin64[] = {
3085
19
      X86::RCX, X86::RDX, X86::R8,  X86::R9
3086
19
    };
3087
19
    return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3088
19
  }
3089
140
3090
140
  static const MCPhysReg GPR64ArgRegs64Bit[] = {
3091
140
    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3092
140
  };
3093
140
  return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3094
140
}
3095
3096
// FIXME: Get this from tablegen.
3097
static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3098
                                                CallingConv::ID CallConv,
3099
159
                                                const X86Subtarget &Subtarget) {
3100
159
  assert(Subtarget.is64Bit());
3101
159
  if (Subtarget.isCallingConvWin64(CallConv)) {
3102
19
    // The XMM registers which might contain var arg parameters are shadowed
3103
19
    // in their paired GPR.  So we only need to save the GPR to their home
3104
19
    // slots.
3105
19
    // TODO: __vectorcall will change this.
3106
19
    return None;
3107
19
  }
3108
140
3109
140
  const Function &F = MF.getFunction();
3110
140
  bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3111
140
  bool isSoftFloat = Subtarget.useSoftFloat();
3112
140
  assert(!(isSoftFloat && NoImplicitFloatOps) &&
3113
140
         "SSE register cannot be used when SSE is disabled!");
3114
140
  if (isSoftFloat || 
NoImplicitFloatOps138
||
!Subtarget.hasSSE1()138
)
3115
4
    // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3116
4
    // registers.
3117
4
    return None;
3118
136
3119
136
  static const MCPhysReg XMMArgRegs64Bit[] = {
3120
136
    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3121
136
    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3122
136
  };
3123
136
  return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3124
136
}
3125
3126
#ifndef NDEBUG
3127
static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3128
  return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3129
                        [](const CCValAssign &A, const CCValAssign &B) -> bool {
3130
                          return A.getValNo() < B.getValNo();
3131
                        });
3132
}
3133
#endif
3134
3135
SDValue X86TargetLowering::LowerFormalArguments(
3136
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3137
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3138
134k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3139
134k
  MachineFunction &MF = DAG.getMachineFunction();
3140
134k
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3141
134k
  const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3142
134k
3143
134k
  const Function &F = MF.getFunction();
3144
134k
  if (F.hasExternalLinkage() && 
Subtarget.isTargetCygMing()129k
&&
3145
134k
      
F.getName() == "main"418
)
3146
19
    FuncInfo->setForceFramePointer(true);
3147
134k
3148
134k
  MachineFrameInfo &MFI = MF.getFrameInfo();
3149
134k
  bool Is64Bit = Subtarget.is64Bit();
3150
134k
  bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3151
134k
3152
134k
  assert(
3153
134k
      !(isVarArg && canGuaranteeTCO(CallConv)) &&
3154
134k
      "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
3155
134k
3156
134k
  // Assign locations to all of the incoming arguments.
3157
134k
  SmallVector<CCValAssign, 16> ArgLocs;
3158
134k
  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3159
134k
3160
134k
  // Allocate shadow area for Win64.
3161
134k
  if (IsWin64)
3162
1.72k
    CCInfo.AllocateStack(32, 8);
3163
134k
3164
134k
  CCInfo.AnalyzeArguments(Ins, CC_X86);
3165
134k
3166
134k
  // In vectorcall calling convention a second pass is required for the HVA
3167
134k
  // types.
3168
134k
  if (CallingConv::X86_VectorCall == CallConv) {
3169
56
    CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3170
56
  }
3171
134k
3172
134k
  // The next loop assumes that the locations are in the same order of the
3173
134k
  // input arguments.
3174
134k
  assert(isSortedByValueNo(ArgLocs) &&
3175
134k
         "Argument Location list must be sorted before lowering");
3176
134k
3177
134k
  SDValue ArgValue;
3178
405k
  for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3179
270k
       
++I, ++InsIndex270k
) {
3180
270k
    assert(InsIndex < Ins.size() && "Invalid Ins index");
3181
270k
    CCValAssign &VA = ArgLocs[I];
3182
270k
3183
270k
    if (VA.isRegLoc()) {
3184
228k
      EVT RegVT = VA.getLocVT();
3185
228k
      if (VA.needsCustom()) {
3186
2
        assert(
3187
2
            VA.getValVT() == MVT::v64i1 &&
3188
2
            "Currently the only custom case is when we split v64i1 to 2 regs");
3189
2
3190
2
        // v64i1 values, in regcall calling convention, that are
3191
2
        // compiled to 32 bit arch, are split up into two registers.
3192
2
        ArgValue =
3193
2
            getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3194
228k
      } else {
3195
228k
        const TargetRegisterClass *RC;
3196
228k
        if (RegVT == MVT::i8)
3197
30
          RC = &X86::GR8RegClass;
3198
228k
        else if (RegVT == MVT::i16)
3199
30
          RC = &X86::GR16RegClass;
3200
228k
        else if (RegVT == MVT::i32)
3201
23.8k
          RC = &X86::GR32RegClass;
3202
205k
        else if (Is64Bit && 
RegVT == MVT::i64183k
)
3203
66.8k
          RC = &X86::GR64RegClass;
3204
138k
        else if (RegVT == MVT::f32)
3205
2.42k
          RC = Subtarget.hasAVX512() ? 
&X86::FR32XRegClass595
:
&X86::FR32RegClass1.82k
;
3206
135k
        else if (RegVT == MVT::f64)
3207
2.05k
          RC = Subtarget.hasAVX512() ? 
&X86::FR64XRegClass395
:
&X86::FR64RegClass1.65k
;
3208
133k
        else if (RegVT == MVT::f80)
3209
6
          RC = &X86::RFP80RegClass;
3210
133k
        else if (RegVT == MVT::f128)
3211
230
          RC = &X86::VR128RegClass;
3212
133k
        else if (RegVT.is512BitVector())
3213
19.7k
          RC = &X86::VR512RegClass;
3214
113k
        else if (RegVT.is256BitVector())
3215
39.5k
          RC = Subtarget.hasVLX() ? 
&X86::VR256XRegClass15.0k
:
&X86::VR256RegClass24.5k
;
3216
74.1k
        else if (RegVT.is128BitVector())
3217
73.9k
          RC = Subtarget.hasVLX() ? 
&X86::VR128XRegClass16.3k
:
&X86::VR128RegClass57.5k
;
3218
236
        else if (RegVT == MVT::x86mmx)
3219
225
          RC = &X86::VR64RegClass;
3220
11
        else if (RegVT == MVT::v1i1)
3221
0
          RC = &X86::VK1RegClass;
3222
11
        else if (RegVT == MVT::v8i1)
3223
0
          RC = &X86::VK8RegClass;
3224
11
        else if (RegVT == MVT::v16i1)
3225
8
          RC = &X86::VK16RegClass;
3226
3
        else if (RegVT == MVT::v32i1)
3227
0
          RC = &X86::VK32RegClass;
3228
3
        else if (RegVT == MVT::v64i1)
3229
0
          RC = &X86::VK64RegClass;
3230
3
        else
3231
3
          llvm_unreachable("Unknown argument type!");
3232
228k
3233
228k
        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3234
228k
        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3235
228k
      }
3236
228k
3237
228k
      // If this is an 8 or 16-bit value, it is really passed promoted to 32
3238
228k
      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
3239
228k
      // right size.
3240
228k
      
if (228k
VA.getLocInfo() == CCValAssign::SExt228k
)
3241
210
        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3242
210
                               DAG.getValueType(VA.getValVT()));
3243
228k
      else if (VA.getLocInfo() == CCValAssign::ZExt)
3244
3.07k
        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3245
3.07k
                               DAG.getValueType(VA.getValVT()));
3246
225k
      else if (VA.getLocInfo() == CCValAssign::BCvt)
3247
0
        ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3248
228k
3249
228k
      if (VA.isExtInLoc()) {
3250
10.9k
        // Handle MMX values passed in XMM regs.
3251
10.9k
        if (RegVT.isVector() && 
VA.getValVT().getScalarType() != MVT::i1629
)
3252
4
          ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3253
10.9k
        else if (VA.getValVT().isVector() &&
3254
10.9k
                 
VA.getValVT().getScalarType() == MVT::i1701
&&
3255
10.9k
                 
(701
(VA.getLocVT() == MVT::i64)701
||
(VA.getLocVT() == MVT::i32)676
||
3256
701
                  
(VA.getLocVT() == MVT::i16)625
||
(VA.getLocVT() == MVT::i8)625
)) {
3257
76
          // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3258
76
          ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3259
76
        } else
3260
10.8k
          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3261
10.9k
      }
3262
228k
    } else {
3263
41.5k
      assert(VA.isMemLoc());
3264
41.5k
      ArgValue =
3265
41.5k
          LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3266
41.5k
    }
3267
270k
3268
270k
    // If value is passed via pointer - do a load.
3269
270k
    
if (270k
VA.getLocInfo() == CCValAssign::Indirect270k
&&
!Ins[I].Flags.isByVal()973
)
3270
972
      ArgValue =
3271
972
          DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3272
270k
3273
270k
    InVals.push_back(ArgValue);
3274
270k
  }
3275
134k
3276
397k
  
for (unsigned I = 0, E = Ins.size(); 134k
I != E;
++I262k
) {
3277
263k
    // Swift calling convention does not require we copy the sret argument
3278
263k
    // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3279
263k
    if (CallConv == CallingConv::Swift)
3280
150
      continue;
3281
263k
3282
263k
    // All x86 ABIs require that for returning structs by value we copy the
3283
263k
    // sret argument into %rax/%eax (depending on ABI) for the return. Save
3284
263k
    // the argument into a virtual register so that we can access it from the
3285
263k
    // return points.
3286
263k
    if (Ins[I].Flags.isSRet()) {
3287
1.20k
      unsigned Reg = FuncInfo->getSRetReturnReg();
3288
1.20k
      if (!Reg) {
3289
1.20k
        MVT PtrTy = getPointerTy(DAG.getDataLayout());
3290
1.20k
        Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3291
1.20k
        FuncInfo->setSRetReturnReg(Reg);
3292
1.20k
      }
3293
1.20k
      SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3294
1.20k
      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3295
1.20k
      break;
3296
1.20k
    }
3297
263k
  }
3298
134k
3299
134k
  unsigned StackSize = CCInfo.getNextStackOffset();
3300
134k
  // Align stack specially for tail calls.
3301
134k
  if (shouldGuaranteeTCO(CallConv,
3302
134k
                         MF.getTarget().Options.GuaranteedTailCallOpt))
3303
42
    StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3304
134k
3305
134k
  // If the function takes variable number of arguments, make a frame index for
3306
134k
  // the start of the first vararg value... for expansion of llvm.va_start. We
3307
134k
  // can skip this if there are no va_start calls.
3308
134k
  if (MFI.hasVAStart() &&
3309
134k
      
(188
Is64Bit188
||
(29
CallConv != CallingConv::X86_FastCall29
&&
3310
188
                   
CallConv != CallingConv::X86_ThisCall29
))) {
3311
188
    FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3312
188
  }
3313
134k
3314
134k
  // Figure out if XMM registers are in use.
3315
134k
  assert(!(Subtarget.useSoftFloat() &&
3316
134k
           F.hasFnAttribute(Attribute::NoImplicitFloat)) &&
3317
134k
         "SSE register cannot be used when SSE is disabled!");
3318
134k
3319
134k
  // 64-bit calling conventions support varargs and register parameters, so we
3320
134k
  // have to do extra work to spill them in the prologue.
3321
134k
  if (Is64Bit && 
isVarArg107k
&&
MFI.hasVAStart()193
) {
3322
159
    // Find the first unallocated argument registers.
3323
159
    ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3324
159
    ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3325
159
    unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3326
159
    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3327
159
    assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
3328
159
           "SSE register cannot be used when SSE is disabled!");
3329
159
3330
159
    // Gather all the live in physical registers.
3331
159
    SmallVector<SDValue, 6> LiveGPRs;
3332
159
    SmallVector<SDValue, 8> LiveXMMRegs;
3333
159
    SDValue ALVal;
3334
626
    for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3335
626
      unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3336
626
      LiveGPRs.push_back(
3337
626
          DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3338
626
    }
3339
159
    if (!ArgXMMs.empty()) {
3340
136
      unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3341
136
      ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3342
1.08k
      for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3343
1.08k
        unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3344
1.08k
        LiveXMMRegs.push_back(
3345
1.08k
            DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3346
1.08k
      }
3347
136
    }
3348
159
3349
159
    if (IsWin64) {
3350
19
      // Get to the caller-allocated home save location.  Add 8 to account
3351
19
      // for the return address.
3352
19
      int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3353
19
      FuncInfo->setRegSaveFrameIndex(
3354
19
          MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3355
19
      // Fixup to set vararg frame on shadow area (4 x i64).
3356
19
      if (NumIntRegs < 4)
3357
11
        FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3358
140
    } else {
3359
140
      // For X86-64, if there are vararg parameters that are passed via
3360
140
      // registers, then we must store them to their spots on the stack so
3361
140
      // they may be loaded by dereferencing the result of va_next.
3362
140
      FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3363
140
      FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3364
140
      FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3365
140
          ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3366
140
    }
3367
159
3368
159
    // Store the integer parameter registers.
3369
159
    SmallVector<SDValue, 8> MemOps;
3370
159
    SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3371
159
                                      getPointerTy(DAG.getDataLayout()));
3372
159
    unsigned Offset = FuncInfo->getVarArgsGPOffset();
3373
626
    for (SDValue Val : LiveGPRs) {
3374
626
      SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3375
626
                                RSFIN, DAG.getIntPtrConstant(Offset, dl));
3376
626
      SDValue Store =
3377
626
          DAG.getStore(Val.getValue(1), dl, Val, FIN,
3378
626
                       MachinePointerInfo::getFixedStack(
3379
626
                           DAG.getMachineFunction(),
3380
626
                           FuncInfo->getRegSaveFrameIndex(), Offset));
3381
626
      MemOps.push_back(Store);
3382
626
      Offset += 8;
3383
626
    }
3384
159
3385
159
    if (!ArgXMMs.empty() && 
NumXMMRegs != ArgXMMs.size()136
) {
3386
136
      // Now store the XMM (fp + vector) parameter registers.
3387
136
      SmallVector<SDValue, 12> SaveXMMOps;
3388
136
      SaveXMMOps.push_back(Chain);
3389
136
      SaveXMMOps.push_back(ALVal);
3390
136
      SaveXMMOps.push_back(DAG.getIntPtrConstant(
3391
136
                             FuncInfo->getRegSaveFrameIndex(), dl));
3392
136
      SaveXMMOps.push_back(DAG.getIntPtrConstant(
3393
136
                             FuncInfo->getVarArgsFPOffset(), dl));
3394
136
      SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3395
136
                        LiveXMMRegs.end());
3396
136
      MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3397
136
                                   MVT::Other, SaveXMMOps));
3398
136
    }
3399
159
3400
159
    if (!MemOps.empty())
3401
151
      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3402
159
  }
3403
134k
3404
134k
  if (isVarArg && 
MFI.hasMustTailInVarArgFunc()238
) {
3405
36
    // Find the largest legal vector type.
3406
36
    MVT VecVT = MVT::Other;
3407
36
    // FIXME: Only some x86_32 calling conventions support AVX512.
3408
36
    if (Subtarget.hasAVX512() &&
3409
36
        
(2
Is64Bit2
||
(2
CallConv == CallingConv::X86_VectorCall2
||
3410
2
                     
CallConv == CallingConv::Intel_OCL_BI1
)))
3411
1
      VecVT = MVT::v16f32;
3412
35
    else if (Subtarget.hasAVX())
3413
3
      VecVT = MVT::v8f32;
3414
32
    else if (Subtarget.hasSSE2())
3415
29
      VecVT = MVT::v4f32;
3416
36
3417
36
    // We forward some GPRs and some vector types.
3418
36
    SmallVector<MVT, 2> RegParmTypes;
3419
36
    MVT IntVT = Is64Bit ? 
MVT::i6422
:
MVT::i3214
;
3420
36
    RegParmTypes.push_back(IntVT);
3421
36
    if (VecVT != MVT::Other)
3422
33
      RegParmTypes.push_back(VecVT);
3423
36
3424
36
    // Compute the set of forwarded registers. The rest are scratch.
3425
36
    SmallVectorImpl<ForwardedRegister> &Forwards =
3426
36
        FuncInfo->getForwardedMustTailRegParms();
3427
36
    CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3428
36
3429
36
    // Conservatively forward AL on x86_64, since it might be used for varargs.
3430
36
    if (Is64Bit && 
!CCInfo.isAllocated(X86::AL)22
) {
3431
22
      unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3432
22
      Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3433
22
    }
3434
36
3435
36
    // Copy all forwards from physical to virtual registers.
3436
345
    for (ForwardedRegister &FR : Forwards) {
3437
345
      // FIXME: Can we use a less constrained schedule?
3438
345
      SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
3439
345
      FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
3440
345
      Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
3441
345
    }
3442
36
  }
3443
134k
3444
134k
  // Some CCs need callee pop.
3445
134k
  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3446
134k
                       MF.getTarget().Options.GuaranteedTailCallOpt)) {
3447
255
    FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3448
134k
  } else if (CallConv == CallingConv::X86_INTR && 
Ins.size() == 239
) {
3449
17
    // X86 interrupts must pop the error code (and the alignment padding) if
3450
17
    // present.
3451
17
    FuncInfo->setBytesToPopOnReturn(Is64Bit ? 
169
:
48
);
3452
134k
  } else {
3453
134k
    FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3454
134k
    // If this is an sret function, the return should pop the hidden pointer.
3455
134k
    if (!Is64Bit && 
!canGuaranteeTCO(CallConv)26.6k
&&
3456
134k
        
!Subtarget.getTargetTriple().isOSMSVCRT()26.4k
&&
3457
134k
        
argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn25.5k
)
3458
558
      FuncInfo->setBytesToPopOnReturn(4);
3459
134k
  }
3460
134k
3461
134k
  if (!Is64Bit) {
3462
26.9k
    // RegSaveFrameIndex is X86-64 only.
3463
26.9k
    FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3464
26.9k
    if (CallConv == CallingConv::X86_FastCall ||
3465
26.9k
        
CallConv == CallingConv::X86_ThisCall26.8k
)
3466
145
      // fastcc functions can't have varargs.
3467
145
      FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3468
26.9k
  }
3469
134k
3470
134k
  FuncInfo->setArgumentStackSize(StackSize);
3471
134k
3472
134k
  if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3473
91
    EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3474
91
    if (Personality == EHPersonality::CoreCLR) {
3475
7
      assert(Is64Bit);
3476
7
      // TODO: Add a mechanism to frame lowering that will allow us to indicate
3477
7
      // that we'd prefer this slot be allocated towards the bottom of the frame
3478
7
      // (i.e. near the stack pointer after allocating the frame).  Every
3479
7
      // funclet needs a copy of this slot in its (mostly empty) frame, and the
3480
7
      // offset from the bottom of this and each funclet's frame must be the
3481
7
      // same, so the size of funclets' (mostly empty) frames is dictated by
3482
7
      // how far this slot is from the bottom (since they allocate just enough
3483
7
      // space to accommodate holding this slot at the correct offset).
3484
7
      int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3485
7
      EHInfo->PSPSymFrameIdx = PSPSymFI;
3486
7
    }
3487
91
  }
3488
134k
3489
134k
  if (CallConv == CallingConv::X86_RegCall ||
3490
134k
      
F.hasFnAttribute("no_caller_saved_registers")134k
) {
3491
134
    MachineRegisterInfo &MRI = MF.getRegInfo();
3492
134
    for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3493
409
      MRI.disableCalleeSavedRegister(Pair.first);
3494
134
  }
3495
134k
3496
134k
  return Chain;
3497
134k
}
3498
3499
SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3500
                                            SDValue Arg, const SDLoc &dl,
3501
                                            SelectionDAG &DAG,
3502
                                            const CCValAssign &VA,
3503
51.5k
                                            ISD::ArgFlagsTy Flags) const {
3504
51.5k
  unsigned LocMemOffset = VA.getLocMemOffset();
3505
51.5k
  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3506
51.5k
  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3507
51.5k
                       StackPtr, PtrOff);
3508
51.5k
  if (Flags.isByVal())
3509
508
    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3510
51.0k
3511
51.0k
  return DAG.getStore(
3512
51.0k
      Chain, dl, Arg, PtrOff,
3513
51.0k
      MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3514
51.0k
}
3515
3516
/// Emit a load of return address if tail call
3517
/// optimization is performed and it is required.
3518
SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3519
    SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3520
8
    bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3521
8
  // Adjust the Return address stack slot.
3522
8
  EVT VT = getPointerTy(DAG.getDataLayout());
3523
8
  OutRetAddr = getReturnAddressFrameIndex(DAG);
3524
8
3525
8
  // Load the "old" Return address.
3526
8
  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3527
8
  return SDValue(OutRetAddr.getNode(), 1);
3528
8
}
3529
3530
/// Emit a store of the return address if tail call
3531
/// optimization is performed and it is required (FPDiff!=0).
3532
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3533
                                        SDValue Chain, SDValue RetAddrFrIdx,
3534
                                        EVT PtrVT, unsigned SlotSize,
3535
99
                                        int FPDiff, const SDLoc &dl) {
3536
99
  // Store the return address to the appropriate stack slot.
3537
99
  if (!FPDiff) 
return Chain91
;
3538
8
  // Calculate the new stack slot for the return address.
3539
8
  int NewReturnAddrFI =
3540
8
    MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3541
8
                                         false);
3542
8
  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3543
8
  Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3544
8
                       MachinePointerInfo::getFixedStack(
3545
8
                           DAG.getMachineFunction(), NewReturnAddrFI));
3546
8
  return Chain;
3547
8
}
3548
3549
/// Returns a vector_shuffle mask for an movs{s|d}, movd
3550
/// operation of specified width.
3551
static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3552
70
                       SDValue V2) {
3553
70
  unsigned NumElems = VT.getVectorNumElements();
3554
70
  SmallVector<int, 8> Mask;
3555
70
  Mask.push_back(NumElems);
3556
274
  for (unsigned i = 1; i != NumElems; 
++i204
)
3557
204
    Mask.push_back(i);
3558
70
  return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3559
70
}
3560
3561
SDValue
3562
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3563
142k
                             SmallVectorImpl<SDValue> &InVals) const {
3564
142k
  SelectionDAG &DAG                     = CLI.DAG;
3565
142k
  SDLoc &dl                             = CLI.DL;
3566
142k
  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3567
142k
  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
3568
142k
  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
3569
142k
  SDValue Chain                         = CLI.Chain;
3570
142k
  SDValue Callee                        = CLI.Callee;
3571
142k
  CallingConv::ID CallConv              = CLI.CallConv;
3572
142k
  bool &isTailCall                      = CLI.IsTailCall;
3573
142k
  bool isVarArg                         = CLI.IsVarArg;
3574
142k
3575
142k
  MachineFunction &MF = DAG.getMachineFunction();
3576
142k
  bool Is64Bit        = Subtarget.is64Bit();
3577
142k
  bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
3578
142k
  StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3579
142k
  bool IsSibcall      = false;
3580
142k
  X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3581
142k
  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3582
142k
  const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3583
142k
  const Function *Fn = CI ? 
CI->getCalledFunction()138k
:
nullptr3.37k
;
3584
142k
  bool HasNCSR = (CI && 
CI->hasFnAttr("no_caller_saved_registers")138k
) ||
3585
142k
                 
(142k
Fn142k
&&
Fn->hasFnAttribute("no_caller_saved_registers")136k
);
3586
142k
  const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3587
142k
  bool HasNoCfCheck =
3588
142k
      (CI && 
CI->doesNoCfCheck()138k
) || (II &&
II->doesNoCfCheck()437
);
3589
142k
  const Module *M = MF.getMMI().getModule();
3590
142k
  Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3591
142k
3592
142k
  MachineFunction::CallSiteInfo CSInfo;
3593
142k
3594
142k
  if (CallConv == CallingConv::X86_INTR)
3595
0
    report_fatal_error("X86 interrupts may not be called directly");
3596
142k
3597
142k
  if (Attr.getValueAsString() == "true")
3598
12
    isTailCall = false;
3599
142k
3600
142k
  if (Subtarget.isPICStyleGOT() &&
3601
142k
      
!MF.getTarget().Options.GuaranteedTailCallOpt175
) {
3602
173
    // If we are using a GOT, disable tail calls to external symbols with
3603
173
    // default visibility. Tail calling such a symbol requires using a GOT
3604
173
    // relocation, which forces early binding of the symbol. This breaks code
3605
173
    // that require lazy function symbol resolution. Using musttail or
3606
173
    // GuaranteedTailCallOpt will override this.
3607
173
    GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3608
173
    if (!G || 
(94
!G->getGlobal()->hasLocalLinkage()94
&&
3609
94
               
G->getGlobal()->hasDefaultVisibility()88
))
3610
164
      isTailCall = false;
3611
173
  }
3612
142k
3613
142k
  bool IsMustTail = CLI.CS && 
CLI.CS.isMustTailCall()139k
;
3614
142k
  if (IsMustTail) {
3615
65
    // Force this to be a tail call.  The verifier rules are enough to ensure
3616
65
    // that we can lower this successfully without moving the return address
3617
65
    // around.
3618
65
    isTailCall = true;
3619
142k
  } else if (isTailCall) {
3620
8.64k
    // Check if it's really possible to do a tail call.
3621
8.64k
    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3622
8.64k
                    isVarArg, SR != NotStructReturn,
3623
8.64k
                    MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3624
8.64k
                    Outs, OutVals, Ins, DAG);
3625
8.64k
3626
8.64k
    // Sibcalls are automatically detected tailcalls which do not require
3627
8.64k
    // ABI changes.
3628
8.64k
    if (!MF.getTarget().Options.GuaranteedTailCallOpt && 
isTailCall8.60k
)
3629
7.58k
      IsSibcall = true;
3630
8.64k
3631
8.64k
    if (isTailCall)
3632
7.61k
      ++NumTailCalls;
3633
8.64k
  }
3634
142k
3635
142k
  assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
3636
142k
         "Var args not supported with calling convention fastcc, ghc or hipe");
3637
142k
3638
142k
  // Analyze operands of the call, assigning locations to each operand.
3639
142k
  SmallVector<CCValAssign, 16> ArgLocs;
3640
142k
  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3641
142k
3642
142k
  // Allocate shadow area for Win64.
3643
142k
  if (IsWin64)
3644
852
    CCInfo.AllocateStack(32, 8);
3645
142k
3646
142k
  CCInfo.AnalyzeArguments(Outs, CC_X86);
3647
142k
3648
142k
  // In vectorcall calling convention a second pass is required for the HVA
3649
142k
  // types.
3650
142k
  if (CallingConv::X86_VectorCall == CallConv) {
3651
8
    CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3652
8
  }
3653
142k
3654
142k
  // Get a count of how many bytes are to be pushed on the stack.
3655
142k
  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3656
142k
  if (IsSibcall)
3657
7.58k
    // This is a sibcall. The memory operands are available in caller's
3658
7.58k
    // own caller's stack.
3659
7.58k
    NumBytes = 0;
3660
134k
  else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3661
134k
           
canGuaranteeTCO(CallConv)50
)
3662
42
    NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3663
142k
3664
142k
  int FPDiff = 0;
3665
142k
  if (isTailCall && 
!IsSibcall7.68k
&&
!IsMustTail99
) {
3666
34
    // Lower arguments at fp - stackoffset + fpdiff.
3667
34
    unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3668
34
3669
34
    FPDiff = NumBytesCallerPushed - NumBytes;
3670
34
3671
34
    // Set the delta of movement of the returnaddr stackslot.
3672
34
    // But only set if delta is greater than previous delta.
3673
34
    if (FPDiff < X86Info->getTCReturnAddrDelta())
3674
8
      X86Info->setTCReturnAddrDelta(FPDiff);
3675
34
  }
3676
142k
3677
142k
  unsigned NumBytesToPush = NumBytes;
3678
142k
  unsigned NumBytesToPop = NumBytes;
3679
142k
3680
142k
  // If we have an inalloca argument, all stack space has already been allocated
3681
142k
  // for us and be right at the top of the stack.  We don't support multiple
3682
142k
  // arguments passed in memory when using inalloca.
3683
142k
  if (!Outs.empty() && 
Outs.back().Flags.isInAlloca()114k
) {
3684
22
    NumBytesToPush = 0;
3685
22
    if (!ArgLocs.back().isMemLoc())
3686
1
      report_fatal_error("cannot use inalloca attribute on a register "
3687
1
                         "parameter");
3688
21
    if (ArgLocs.back().getLocMemOffset() != 0)
3689
0
      report_fatal_error("any parameter with the inalloca attribute must be "
3690
0
                         "the only memory argument");
3691
142k
  }
3692
142k
3693
142k
  if (!IsSibcall)
3694
134k
    Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3695
134k
                                 NumBytes - NumBytesToPush, dl);
3696
142k
3697
142k
  SDValue RetAddrFrIdx;
3698
142k
  // Load return address for tail calls.
3699
142k
  if (isTailCall && 
FPDiff7.68k
)
3700
8
    Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3701
8
                                    Is64Bit, FPDiff, dl);
3702
142k
3703
142k
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3704
142k
  SmallVector<SDValue, 8> MemOpChains;
3705
142k
  SDValue StackPtr;
3706
142k
3707
142k
  // The next loop assumes that the locations are in the same order of the
3708
142k
  // input arguments.
3709
142k
  assert(isSortedByValueNo(ArgLocs) &&
3710
142k
         "Argument Location list must be sorted before lowering");
3711
142k
3712
142k
  // Walk the register/memloc assignments, inserting copies/loads.  In the case
3713
142k
  // of tail call optimization arguments are handle later.
3714
142k
  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3715
448k
  for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3716
306k
       ++I, ++OutIndex) {
3717
306k
    assert(OutIndex < Outs.size() && "Invalid Out index");
3718
306k
    // Skip inalloca arguments, they have already been written.
3719
306k
    ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3720
306k
    if (Flags.isInAlloca())
3721
21
      continue;
3722
306k
3723
306k
    CCValAssign &VA = ArgLocs[I];
3724
306k
    EVT RegVT = VA.getLocVT();
3725
306k
    SDValue Arg = OutVals[OutIndex];
3726
306k
    bool isByVal = Flags.isByVal();
3727
306k
3728
306k
    // Promote the value if needed.
3729
306k
    switch (VA.getLocInfo()) {
3730
306k
    
default: 0
llvm_unreachable0
("Unknown loc info!");
3731
306k
    
case CCValAssign::Full: break291k
;
3732
306k
    case CCValAssign::SExt:
3733
265
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3734
265
      break;
3735
306k
    case CCValAssign::ZExt:
3736
14.2k
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3737
14.2k
      break;
3738
306k
    case CCValAssign::AExt:
3739
363
      if (Arg.getValueType().isVector() &&
3740
363
          
Arg.getValueType().getVectorElementType() == MVT::i1114
)
3741
114
        Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3742
249
      else if (RegVT.is128BitVector()) {
3743
3
        // Special case: passing MMX values in XMM registers.
3744
3
        Arg = DAG.getBitcast(MVT::i64, Arg);
3745
3
        Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3746
3
        Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3747
3
      } else
3748
246
        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3749
363
      break;
3750
306k
    case CCValAssign::BCvt:
3751
0
      Arg = DAG.getBitcast(RegVT, Arg);
3752
0
      break;
3753
306k
    case CCValAssign::Indirect: {
3754
34
      if (isByVal) {
3755
7
        // Memcpy the argument to a temporary stack slot to prevent
3756
7
        // the caller from seeing any modifications the callee may make
3757
7
        // as guaranteed by the `byval` attribute.
3758
7
        int FrameIdx = MF.getFrameInfo().CreateStackObject(
3759
7
            Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3760
7
            false);
3761
7
        SDValue StackSlot =
3762
7
            DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3763
7
        Chain =
3764
7
            CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3765
7
        // From now on treat this as a regular pointer
3766
7
        Arg = StackSlot;
3767
7
        isByVal = false;
3768
27
      } else {
3769
27
        // Store the argument.
3770
27
        SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3771
27
        int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3772
27
        Chain = DAG.getStore(
3773
27
            Chain, dl, Arg, SpillSlot,
3774
27
            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3775
27
        Arg = SpillSlot;
3776
27
      }
3777
34
      break;
3778
306k
    }
3779
306k
    }
3780
306k
3781
306k
    if (VA.needsCustom()) {
3782
2
      assert(VA.getValVT() == MVT::v64i1 &&
3783
2
             "Currently the only custom case is when we split v64i1 to 2 regs");
3784
2
      // Split v64i1 value into two registers
3785
2
      Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3786
2
                         Subtarget);
3787
306k
    } else if (VA.isRegLoc()) {
3788
254k
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3789
254k
      const TargetOptions &Options = DAG.getTarget().Options;
3790
254k
      if (Options.EnableDebugEntryValues)
3791
3
        CSInfo.emplace_back(VA.getLocReg(), I);
3792
254k
      if (isVarArg && 
IsWin6415.1k
) {
3793
213
        // Win64 ABI requires argument XMM reg to be copied to the corresponding
3794
213
        // shadow reg if callee is a varargs function.
3795
213
        unsigned ShadowReg = 0;
3796
213
        switch (VA.getLocReg()) {
3797
213
        
case X86::XMM0: ShadowReg = X86::RCX; break0
;
3798
213
        
case X86::XMM1: ShadowReg = X86::RDX; break4
;
3799
213
        
case X86::XMM2: ShadowReg = X86::R8; break0
;
3800
213
        
case X86::XMM3: ShadowReg = X86::R9; break0
;
3801
213
        }
3802
213
        if (ShadowReg)
3803
4
          RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3804
213
      }
3805
254k
    } else 
if (52.5k
!IsSibcall52.5k
&&
(51.6k
!isTailCall51.6k
||
isByVal80
)) {
3806
51.5k
      assert(VA.isMemLoc());
3807
51.5k
      if (!StackPtr.getNode())
3808
17.2k
        StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3809
17.2k
                                      getPointerTy(DAG.getDataLayout()));
3810
51.5k
      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3811
51.5k
                                             dl, DAG, VA, Flags));
3812
51.5k
    }
3813
306k
  }
3814
142k
3815
142k
  if (!MemOpChains.empty())
3816
17.2k
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3817
142k
3818
142k
  if (Subtarget.isPICStyleGOT()) {
3819
175
    // ELF / PIC requires GOT in the EBX register before function calls via PLT
3820
175
    // GOT pointer.
3821
175
    if (!isTailCall) {
3822
169
      RegsToPass.push_back(std::make_pair(
3823
169
          unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3824
169
                                          getPointerTy(DAG.getDataLayout()))));
3825
169
    } else {
3826
6
      // If we are tail calling and generating PIC/GOT style code load the
3827
6
      // address of the callee into ECX. The value in ecx is used as target of
3828
6
      // the tail jump. This is done to circumvent the ebx/callee-saved problem
3829
6
      // for tail calls on PIC/GOT architectures. Normally we would just put the
3830
6
      // address of GOT into ebx and then call target@PLT. But for tail calls
3831
6
      // ebx would be restored (since ebx is callee saved) before jumping to the
3832
6
      // target@PLT.
3833
6
3834
6
      // Note: The actual moving to ECX is done further down.
3835
6
      GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3836
6
      if (G && !G->getGlobal()->hasLocalLinkage() &&
3837
6
          
G->getGlobal()->hasDefaultVisibility()4
)
3838
2
        Callee = LowerGlobalAddress(Callee, DAG);
3839
4
      else if (isa<ExternalSymbolSDNode>(Callee))
3840
0
        Callee = LowerExternalSymbol(Callee, DAG);
3841
6
    }
3842
175
  }
3843
142k
3844
142k
  if (Is64Bit && 
isVarArg122k
&&
!IsWin646.03k
&&
!IsMustTail5.92k
) {
3845
5.92k
    // From AMD64 ABI document:
3846
5.92k
    // For calls that may call functions that use varargs or stdargs
3847
5.92k
    // (prototype-less calls or calls to functions containing ellipsis (...) in
3848
5.92k
    // the declaration) %al is used as hidden argument to specify the number
3849
5.92k
    // of SSE registers used. The contents of %al do not need to match exactly
3850
5.92k
    // the number of registers, but must be an ubound on the number of SSE
3851
5.92k
    // registers used and is in the range 0 - 8 inclusive.
3852
5.92k
3853
5.92k
    // Count the number of XMM registers allocated.
3854
5.92k
    static const MCPhysReg XMMArgRegs[] = {
3855
5.92k
      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3856
5.92k
      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3857
5.92k
    };
3858
5.92k
    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3859
5.92k
    assert((Subtarget.hasSSE1() || !NumXMMRegs)
3860
5.92k
           && "SSE registers cannot be used when SSE is disabled");
3861
5.92k
3862
5.92k
    RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3863
5.92k
                                        DAG.getConstant(NumXMMRegs, dl,
3864
5.92k
                                                        MVT::i8)));
3865
5.92k
  }
3866
142k
3867
142k
  if (isVarArg && 
IsMustTail7.36k
) {
3868
28
    const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3869
185
    for (const auto &F : Forwards) {
3870
185
      SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3871
185
      RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3872
185
    }
3873
28
  }
3874
142k
3875
142k
  // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
3876
142k
  // don't need this because the eligibility check rejects calls that require
3877
142k
  // shuffling arguments passed in memory.
3878
142k
  if (!IsSibcall && 
isTailCall134k
) {
3879
99
    // Force all the incoming stack arguments to be loaded from the stack
3880
99
    // before any new outgoing arguments are stored to the stack, because the
3881
99
    // outgoing stack slots may alias the incoming argument stack slots, and
3882
99
    // the alias isn't otherwise explicit. This is slightly more conservative
3883
99
    // than necessary, because it means that each store effectively depends
3884
99
    // on every argument instead of just those arguments it would clobber.
3885
99
    SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3886
99
3887
99
    SmallVector<SDValue, 8> MemOpChains2;
3888
99
    SDValue FIN;
3889
99
    int FI = 0;
3890
331
    for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3891
232
         ++I, ++OutsIndex) {
3892
232
      CCValAssign &VA = ArgLocs[I];
3893
232
3894
232
      if (VA.isRegLoc()) {
3895
140
        if (VA.needsCustom()) {
3896
0
          assert((CallConv == CallingConv::X86_RegCall) &&
3897
0
                 "Expecting custom case only in regcall calling convention");
3898
0
          // This means that we are in special case where one argument was
3899
0
          // passed through two register locations - Skip the next location
3900
0
          ++I;
3901
0
        }
3902
140
3903
140
        continue;
3904
140
      }
3905
92
3906
92
      assert(VA.isMemLoc());
3907
92
      SDValue Arg = OutVals[OutsIndex];
3908
92
      ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3909
92
      // Skip inalloca arguments.  They don't require any work.
3910
92
      if (Flags.isInAlloca())
3911
12
        continue;
3912
80
      // Create frame index.
3913
80
      int32_t Offset = VA.getLocMemOffset()+FPDiff;
3914
80
      uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3915
80
      FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3916
80
      FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3917
80
3918
80
      if (Flags.isByVal()) {
3919
2
        // Copy relative to framepointer.
3920
2
        SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3921
2
        if (!StackPtr.getNode())
3922
0
          StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3923
0
                                        getPointerTy(DAG.getDataLayout()));
3924
2
        Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3925
2
                             StackPtr, Source);
3926
2
3927
2
        MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3928
2
                                                         ArgChain,
3929
2
                                                         Flags, DAG, dl));
3930
78
      } else {
3931
78
        // Store relative to framepointer.
3932
78
        MemOpChains2.push_back(DAG.getStore(
3933
78
            ArgChain, dl, Arg, FIN,
3934
78
            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3935
78
      }
3936
80
    }
3937
99
3938
99
    if (!MemOpChains2.empty())
3939
48
      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3940
99
3941
99
    // Store the return address to the appropriate stack slot.
3942
99
    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3943
99
                                     getPointerTy(DAG.getDataLayout()),
3944
99
                                     RegInfo->getSlotSize(), FPDiff, dl);
3945
99
  }
3946
142k
3947
142k
  // Build a sequence of copy-to-reg nodes chained together with token chain
3948
142k
  // and flag operands which copy the outgoing args into registers.
3949
142k
  SDValue InFlag;
3950
402k
  for (unsigned i = 0, e = RegsToPass.size(); i != e; 
++i260k
) {
3951
260k
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3952
260k
                             RegsToPass[i].second, InFlag);
3953
260k
    InFlag = Chain.getValue(1);
3954
260k
  }
3955
142k
3956
142k
  if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3957
263
    assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
3958
263
    // In the 64-bit large code model, we have to make all calls
3959
263
    // through a register, since the call instruction's 32-bit
3960
263
    // pc-relative offset may not be large enough to hold the whole
3961
263
    // address.
3962
141k
  } else if (Callee->getOpcode() == ISD::GlobalAddress ||
3963
141k
             
Callee->getOpcode() == ISD::ExternalSymbol5.20k
) {
3964
139k
    // Lower direct calls to global addresses and external symbols. Setting
3965
139k
    // ForCall to true here has the effect of removing WrapperRIP when possible
3966
139k
    // to allow direct calls to be selected without first materializing the
3967
139k
    // address into a register.
3968
139k
    Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
3969
139k
  } else 
if (2.49k
Subtarget.isTarget64BitILP32()2.49k
&&
3970
2.49k
             
Callee->getValueType(0) == MVT::i3219
) {
3971
19
    // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3972
19
    Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3973
19
  }
3974
142k
3975
142k
  // Returns a chain & a flag for retval copy to use.
3976
142k
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3977
142k
  SmallVector<SDValue, 8> Ops;
3978
142k
3979
142k
  if (!IsSibcall && 
isTailCall134k
) {
3980
99
    Chain = DAG.getCALLSEQ_END(Chain,
3981
99
                               DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3982
99
                               DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3983
99
    InFlag = Chain.getValue(1);
3984
99
  }
3985
142k
3986
142k
  Ops.push_back(Chain);
3987
142k
  Ops.push_back(Callee);
3988
142k
3989
142k
  if (isTailCall)
3990
7.68k
    Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3991
142k
3992
142k
  // Add argument registers to the end of the list so that they are known live
3993
142k
  // into the call.
3994
402k
  for (unsigned i = 0, e = RegsToPass.size(); i != e; 
++i260k
)
3995
260k
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3996
260k
                                  RegsToPass[i].second.getValueType()));
3997
142k
3998
142k
  // Add a register mask operand representing the call-preserved registers.
3999
142k
  // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4000
142k
  // set X86_INTR calling convention because it has the same CSR mask
4001
142k
  // (same preserved registers).
4002
142k
  const uint32_t *Mask = RegInfo->getCallPreservedMask(
4003
142k
      MF, HasNCSR ? 
(CallingConv::ID)CallingConv::X86_INTR5
:
CallConv142k
);
4004
142k
  assert(Mask && "Missing call preserved mask for calling convention");
4005
142k
4006
142k
  // If this is an invoke in a 32-bit function using a funclet-based
4007
142k
  // personality, assume the function clobbers all registers. If an exception
4008
142k
  // is thrown, the runtime will not restore CSRs.
4009
142k
  // FIXME: Model this more precisely so that we can register allocate across
4010
142k
  // the normal edge and spill and fill across the exceptional edge.
4011
142k
  if (!Is64Bit && 
CLI.CS19.8k
&&
CLI.CS.isInvoke()19.1k
) {
4012
180
    const Function &CallerFn = MF.getFunction();
4013
180
    EHPersonality Pers =
4014
180
        CallerFn.hasPersonalityFn()
4015
180
            ? classifyEHPersonality(CallerFn.getPersonalityFn())
4016
180
            : 
EHPersonality::Unknown0
;
4017
180
    if (isFuncletEHPersonality(Pers))
4018
51
      Mask = RegInfo->getNoPreservedMask();
4019
180
  }
4020
142k
4021
142k
  // Define a new register mask from the existing mask.
4022
142k
  uint32_t *RegMask = nullptr;
4023
142k
4024
142k
  // In some calling conventions we need to remove the used physical registers
4025
142k
  // from the reg mask.
4026
142k
  if (CallConv == CallingConv::X86_RegCall || 
HasNCSR142k
) {
4027
70
    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4028
70
4029
70
    // Allocate a new Reg Mask and copy Mask.
4030
70
    RegMask = MF.allocateRegMask();
4031
70
    unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4032
70
    memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4033
70
4034
70
    // Make sure all sub registers of the argument registers are reset
4035
70
    // in the RegMask.
4036
70
    for (auto const &RegPair : RegsToPass)
4037
137
      for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4038
743
           SubRegs.isValid(); 
++SubRegs606
)
4039
606
        RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4040
70
4041
70
    // Create the RegMask Operand according to our updated mask.
4042
70
    Ops.push_back(DAG.getRegisterMask(RegMask));
4043
142k
  } else {
4044
142k
    // Create the RegMask Operand according to the static mask.
4045
142k
    Ops.push_back(DAG.getRegisterMask(Mask));
4046
142k
  }
4047
142k
4048
142k
  if (InFlag.getNode())
4049
99.8k
    Ops.push_back(InFlag);
4050
142k
4051
142k
  if (isTailCall) {
4052
7.68k
    // We used to do:
4053
7.68k
    //// If this is the first return lowered for this function, add the regs
4054
7.68k
    //// to the liveout set for the function.
4055
7.68k
    // This isn't right, although it's probably harmless on x86; liveouts
4056
7.68k
    // should be computed from returns not tail calls.  Consider a void
4057
7.68k
    // function making a tail call to a function returning int.
4058
7.68k
    MF.getFrameInfo().setHasTailCall();
4059
7.68k
    SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4060
7.68k
    DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4061
7.68k
    return Ret;
4062
7.68k
  }
4063
134k
4064
134k
  if (HasNoCfCheck && 
IsCFProtectionSupported1
) {
4065
1
    Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4066
134k
  } else {
4067
134k
    Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4068
134k
  }
4069
134k
  InFlag = Chain.getValue(1);
4070
134k
  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4071
134k
4072
134k
  // Create the CALLSEQ_END node.
4073
134k
  unsigned NumBytesForCalleeToPop;
4074
134k
  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4075
134k
                       DAG.getTarget().Options.GuaranteedTailCallOpt))
4076
167
    NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
4077
134k
  else if (!Is64Bit && 
!canGuaranteeTCO(CallConv)19.0k
&&
4078
134k
           
!Subtarget.getTargetTriple().isOSMSVCRT()18.4k
&&
4079
134k
           
SR == StackStructReturn17.8k
)
4080
91
    // If this is a call to a struct-return function, the callee
4081
91
    // pops the hidden struct pointer, so we have to push it back.
4082
91
    // This is common for Darwin/X86, Linux & Mingw32 targets.
4083
91
    // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4084
91
    NumBytesForCalleeToPop = 4;
4085
134k
  else
4086
134k
    NumBytesForCalleeToPop = 0;  // Callee pops nothing.
4087
134k
4088
134k
  if (CLI.DoesNotReturn && 
!getTargetMachine().Options.TrapUnreachable12.9k
) {
4089
403
    // No need to reset the stack after the call if the call doesn't return. To
4090
403
    // make the MI verify, we'll pretend the callee does it for us.
4091
403
    NumBytesForCalleeToPop = NumBytes;
4092
403
  }
4093
134k
4094
134k
  // Returns a flag for retval copy to use.
4095
134k
  if (
!IsSibcall134k
) {
4096
134k
    Chain = DAG.getCALLSEQ_END(Chain,
4097
134k
                               DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4098
134k
                               DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4099
134k
                                                     true),
4100
134k
                               InFlag, dl);
4101
134k
    InFlag = Chain.getValue(1);
4102
134k
  }
4103
134k
4104
134k
  // Handle result values, copying them out of physregs into vregs that we
4105
134k
  // return.
4106
134k
  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4107
134k
                         InVals, RegMask);
4108
134k
}
4109
4110
//===----------------------------------------------------------------------===//
4111
//                Fast Calling Convention (tail call) implementation
4112
//===----------------------------------------------------------------------===//
4113
4114
//  Like std call, callee cleans arguments, convention except that ECX is
4115
//  reserved for storing the tail called function address. Only 2 registers are
4116
//  free for argument passing (inreg). Tail call optimization is performed
4117
//  provided:
4118
//                * tailcallopt is enabled
4119
//                * caller/callee are fastcc
4120
//  On X86_64 architecture with GOT-style position independent code only local
4121
//  (within module) calls are supported at the moment.
4122
//  To keep the stack aligned according to platform abi the function
4123
//  GetAlignedArgumentStackSize ensures that argument delta is always multiples
4124
//  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4125
//  If a tail called function callee has more arguments than the caller the
4126
//  caller needs to make sure that there is room to move the RETADDR to. This is
4127
//  achieved by reserving an area the size of the argument delta right after the
4128
//  original RETADDR, but before the saved framepointer or the spilled registers
4129
//  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4130
//  stack layout:
4131
//    arg1
4132
//    arg2
4133
//    RETADDR
4134
//    [ new RETADDR
4135
//      move area ]
4136
//    (possible EBP)
4137
//    ESI
4138
//    EDI
4139
//    local1 ..
4140
4141
/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4142
/// requirement.
4143
unsigned
4144
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
4145
84
                                               SelectionDAG& DAG) const {
4146
84
  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4147
84
  const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
4148
84
  unsigned StackAlignment = TFI.getStackAlignment();
4149
84
  uint64_t AlignMask = StackAlignment - 1;
4150
84
  int64_t Offset = StackSize;
4151
84
  unsigned SlotSize = RegInfo->getSlotSize();
4152
84
  if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
4153
84
    // Number smaller than 12 so just add the difference.
4154
84
    Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
4155
84
  } else {
4156
0
    // Mask out lower bits, add stackalignment once plus the 12 bytes.
4157
0
    Offset = ((~AlignMask) & Offset) + StackAlignment +
4158
0
      (StackAlignment-SlotSize);
4159
0
  }
4160
84
  return Offset;
4161
84
}
4162
4163
/// Return true if the given stack call argument is already available in the
4164
/// same position (relatively) of the caller's incoming argument stack.
4165
static
4166
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4167
                         MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4168
1.95k
                         const X86InstrInfo *TII, const CCValAssign &VA) {
4169
1.95k
  unsigned Bytes = Arg.getValueSizeInBits() / 8;
4170
1.95k
4171
1.97k
  for (;;) {
4172
1.97k
    // Look through nodes that don't alter the bits of the incoming value.
4173
1.97k
    unsigned Op = Arg.getOpcode();
4174
1.97k
    if (Op == ISD::ZERO_EXTEND || 
Op == ISD::ANY_EXTEND1.96k
||
Op == ISD::BITCAST1.96k
) {
4175
16
      Arg = Arg.getOperand(0);
4176
16
      continue;
4177
16
    }
4178
1.96k
    if (Op == ISD::TRUNCATE) {
4179
9
      const SDValue &TruncInput = Arg.getOperand(0);
4180
9
      if (TruncInput.getOpcode() == ISD::AssertZext &&
4181
9
          cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4182
6
              Arg.getValueType()) {
4183
6
        Arg = TruncInput.getOperand(0);
4184
6
        continue;
4185
6
      }
4186
1.95k
    }
4187
1.95k
    break;
4188
1.95k
  }
4189
1.95k
4190
1.95k
  int FI = INT_MAX;
4191
1.95k
  if (Arg.getOpcode() == ISD::CopyFromReg) {
4192
805
    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4193
805
    if (!TargetRegisterInfo::isVirtualRegister(VR))
4194
17
      return false;
4195
788
    MachineInstr *Def = MRI->getVRegDef(VR);
4196
788
    if (!Def)
4197
12
      return false;
4198
776
    if (!Flags.isByVal()) {
4199
773
      if (!TII->isLoadFromStackSlot(*Def, FI))
4200
93
        return false;
4201
3
    } else {
4202
3
      unsigned Opcode = Def->getOpcode();
4203
3
      if ((Opcode == X86::LEA32r || 
Opcode == X86::LEA64r2
||
4204
3
           
Opcode == X86::LEA64_32r1
) &&
4205
3
          Def->getOperand(1).isFI()) {
4206
3
        FI = Def->getOperand(1).getIndex();
4207
3
        Bytes = Flags.getByValSize();
4208
3
      } else
4209
0
        return false;
4210
1.15k
    }
4211
1.15k
  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4212
687
    if (Flags.isByVal())
4213
1
      // ByVal argument is passed in as a pointer but it's now being
4214
1
      // dereferenced. e.g.
4215
1
      // define @foo(%struct.X* %A) {
4216
1
      //   tail call @bar(%struct.X* byval %A)
4217
1
      // }
4218
1
      return false;
4219
686
    SDValue Ptr = Ld->getBasePtr();
4220
686
    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4221
686
    if (!FINode)
4222
43
      return false;
4223
643
    FI = FINode->getIndex();
4224
643
  } else 
if (463
Arg.getOpcode() == ISD::FrameIndex463
&&
Flags.isByVal()79
) {
4225
79
    FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4226
79
    FI = FINode->getIndex();
4227
79
    Bytes = Flags.getByValSize();
4228
79
  } else
4229
384
    return false;
4230
1.40k
4231
1.40k
  assert(FI != INT_MAX);
4232
1.40k
  if (!MFI.isFixedObjectIndex(FI))
4233
75
    return false;
4234
1.33k
4235
1.33k
  if (Offset != MFI.getObjectOffset(FI))
4236
81
    return false;
4237
1.24k
4238
1.24k
  // If this is not byval, check that the argument stack object is immutable.
4239
1.24k
  // inalloca and argument copy elision can create mutable argument stack
4240
1.24k
  // objects. Byval objects can be mutated, but a byval call intends to pass the
4241
1.24k
  // mutated memory.
4242
1.24k
  if (!Flags.isByVal() && 
!MFI.isImmutableObjectIndex(FI)1.24k
)
4243
86
    return false;
4244
1.16k
4245
1.16k
  if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4246
14
    // If the argument location is wider than the argument type, check that any
4247
14
    // extension flags match.
4248
14
    if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4249
14
        
Flags.isSExt() != MFI.isObjectSExt(FI)13
) {
4250
1
      return false;
4251
1
    }
4252
1.16k
  }
4253
1.16k
4254
1.16k
  return Bytes == MFI.getObjectSize(FI);
4255
1.16k
}
4256
4257
/// Check whether the call is eligible for tail call optimization. Targets
4258
/// that want to do tail call optimization should implement this function.
4259
bool X86TargetLowering::IsEligibleForTailCallOptimization(
4260
    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4261
    bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4262
    const SmallVectorImpl<ISD::OutputArg> &Outs,
4263
    const SmallVectorImpl<SDValue> &OutVals,
4264
8.64k
    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4265
8.64k
  if (!mayTailCallThisCC(CalleeCC))
4266
0
    return false;
4267
8.64k
4268
8.64k
  // If -tailcallopt is specified, make fastcc functions tail-callable.
4269
8.64k
  MachineFunction &MF = DAG.getMachineFunction();
4270
8.64k
  const Function &CallerF = MF.getFunction();
4271
8.64k
4272
8.64k
  // If the function return type is x86_fp80 and the callee return type is not,
4273
8.64k
  // then the FP_EXTEND of the call result is not a nop. It's not safe to
4274
8.64k
  // perform a tailcall optimization here.
4275
8.64k
  if (CallerF.getReturnType()->isX86_FP80Ty() && 
!RetTy->isX86_FP80Ty()19
)
4276
0
    return false;
4277
8.64k
4278
8.64k
  CallingConv::ID CallerCC = CallerF.getCallingConv();
4279
8.64k
  bool CCMatch = CallerCC == CalleeCC;
4280
8.64k
  bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4281
8.64k
  bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4282
8.64k
4283
8.64k
  // Win64 functions have extra shadow space for argument homing. Don't do the
4284
8.64k
  // sibcall if the caller and callee have mismatched expectations for this
4285
8.64k
  // space.
4286
8.64k
  if (IsCalleeWin64 != IsCallerWin64)
4287
2
    return false;
4288
8.64k
4289
8.64k
  if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4290
38
    if (canGuaranteeTCO(CalleeCC) && 
CCMatch35
)
4291
34
      return true;
4292
4
    return false;
4293
4
  }
4294
8.60k
4295
8.60k
  // Look for obvious safe cases to perform tail call optimization that do not
4296
8.60k
  // require ABI changes. This is what gcc calls sibcall.
4297
8.60k
4298
8.60k
  // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4299
8.60k
  // emit a special epilogue.
4300
8.60k
  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4301
8.60k
  if (RegInfo->needsStackRealignment(MF))
4302
7
    return false;
4303
8.59k
4304
8.59k
  // Also avoid sibcall optimization if either caller or callee uses struct
4305
8.59k
  // return semantics.
4306
8.59k
  if (isCalleeStructRet || 
isCallerStructRet8.53k
)
4307
75
    return false;
4308
8.52k
4309
8.52k
  // Do not sibcall optimize vararg calls unless all arguments are passed via
4310
8.52k
  // registers.
4311
8.52k
  LLVMContext &C = *DAG.getContext();
4312
8.52k
  if (isVarArg && 
!Outs.empty()416
) {
4313
372
    // Optimizing for varargs on Win64 is unlikely to be safe without
4314
372
    // additional testing.
4315
372
    if (IsCalleeWin64 || 
IsCallerWin64367
)
4316
5
      return false;
4317
367
4318
367
    SmallVector<CCValAssign, 16> ArgLocs;
4319
367
    CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4320
367
4321
367
    CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4322
1.12k
    for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i753
)
4323
858
      if (!ArgLocs[i].isRegLoc())
4324
105
        return false;
4325
367
  }
4326
8.52k
4327
8.52k
  // If the call result is in ST0 / ST1, it needs to be popped off the x87
4328
8.52k
  // stack.  Therefore, if it's not used by the call it is not safe to optimize
4329
8.52k
  // this into a sibcall.
4330
8.52k
  bool Unused = false;
4331
11.1k
  for (unsigned i = 0, e = Ins.size(); i != e; 
++i2.72k
) {
4332
3.55k
    if (!Ins[i].Used) {
4333
836
      Unused = true;
4334
836
      break;
4335
836
    }
4336
3.55k
  }
4337
8.41k
  if (Unused) {
4338
836
    SmallVector<CCValAssign, 16> RVLocs;
4339
836
    CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4340
836
    CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4341
1.67k
    for (unsigned i = 0, e = RVLocs.size(); i != e; 
++i836
) {
4342
838
      CCValAssign &VA = RVLocs[i];
4343
838
      if (VA.getLocReg() == X86::FP0 || 
VA.getLocReg() == X86::FP1836
)
4344
2
        return false;
4345
838
    }
4346
836
  }
4347
8.41k
4348
8.41k
  // Check that the call results are passed in the same way.
4349
8.41k
  
if (8.41k
!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4350
8.41k
                                  RetCC_X86, RetCC_X86))
4351
1
    return false;
4352
8.41k
  // The callee has to preserve all registers the caller needs to preserve.
4353
8.41k
  const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4354
8.41k
  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4355
8.41k
  if (!CCMatch) {
4356
397
    const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4357
397
    if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4358
7
      return false;
4359
8.40k
  }
4360
8.40k
4361
8.40k
  unsigned StackArgsSize = 0;
4362
8.40k
4363
8.40k
  // If the callee takes no arguments then go on to check the results of the
4364
8.40k
  // call.
4365
8.40k
  if (!Outs.empty()) {
4366
7.44k
    // Check if stack adjustment is needed. For now, do not do this if any
4367
7.44k
    // argument is passed on the stack.
4368
7.44k
    SmallVector<CCValAssign, 16> ArgLocs;
4369
7.44k
    CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4370
7.44k
4371
7.44k
    // Allocate shadow area for Win64
4372
7.44k
    if (IsCalleeWin64)
4373
58
      CCInfo.AllocateStack(32, 8);
4374
7.44k
4375
7.44k
    CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4376
7.44k
    StackArgsSize = CCInfo.getNextStackOffset();
4377
7.44k
4378
7.44k
    if (CCInfo.getNextStackOffset()) {
4379
1.28k
      // Check if the arguments are already laid out in the right way as
4380
1.28k
      // the caller's fixed stack objects.
4381
1.28k
      MachineFrameInfo &MFI = MF.getFrameInfo();
4382
1.28k
      const MachineRegisterInfo *MRI = &MF.getRegInfo();
4383
1.28k
      const X86InstrInfo *TII = Subtarget.getInstrInfo();
4384
3.04k
      for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i1.75k
) {
4385
2.55k
        CCValAssign &VA = ArgLocs[i];
4386
2.55k
        SDValue Arg = OutVals[i];
4387
2.55k
        ISD::ArgFlagsTy Flags = Outs[i].Flags;
4388
2.55k
        if (VA.getLocInfo() == CCValAssign::Indirect)
4389
0
          return false;
4390
2.55k
        if (!VA.isRegLoc()) {
4391
1.95k
          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4392
1.95k
                                   MFI, MRI, TII, VA))
4393
793
            return false;
4394
1.95k
        }
4395
2.55k
      }
4396
1.28k
    }
4397
7.44k
4398
7.44k
    bool PositionIndependent = isPositionIndependent();
4399
6.64k
    // If the tailcall address may be in a register, then make sure it's
4400
6.64k
    // possible to register allocate for it. In 32-bit, the call address can
4401
6.64k
    // only target EAX, EDX, or ECX since the tail call must be scheduled after
4402
6.64k
    // callee-saved registers are restored. These happen to be the same
4403
6.64k
    // registers used to pass 'inreg' arguments so watch out for those.
4404
6.64k
    if (!Subtarget.is64Bit() && 
(450
(450
!isa<GlobalAddressSDNode>(Callee)450
&&
4405
450
                                  
!isa<ExternalSymbolSDNode>(Callee)51
) ||
4406
450
                                 
PositionIndependent415
)) {
4407
357
      unsigned NumInRegs = 0;
4408
357
      // In PIC we need an extra register to formulate the address computation
4409
357
      // for the callee.
4410
357
      unsigned MaxInRegs = PositionIndependent ? 
2341
:
316
;
4411
357
4412
1.12k
      for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i764
) {
4413
772
        CCValAssign &VA = ArgLocs[i];
4414
772
        if (!VA.isRegLoc())
4415
743
          continue;
4416
29
        unsigned Reg = VA.getLocReg();
4417
29
        switch (Reg) {
4418
29
        
default: break4
;
4419
29
        
case X86::EAX: 25
case X86::EDX: 25
case X86::ECX:
4420
25
          if (++NumInRegs == MaxInRegs)
4421
8
            return false;
4422
17
          break;
4423
29
        }
4424
29
      }
4425
357
    }
4426
6.64k
4427
6.64k
    const MachineRegisterInfo &MRI = MF.getRegInfo();
4428
6.64k
    if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4429
2
      return false;
4430
7.60k
  }
4431
7.60k
4432
7.60k
  bool CalleeWillPop =
4433
7.60k
      X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4434
7.60k
                       MF.getTarget().Options.GuaranteedTailCallOpt);
4435
7.60k
4436
7.60k
  if (unsigned BytesToPop =
4437
24
          MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4438
24
    // If we have bytes to pop, the callee must pop them.
4439
24
    bool CalleePopMatches = CalleeWillPop && 
BytesToPop == StackArgsSize12
;
4440
24
    if (!CalleePopMatches)
4441
17
      return false;
4442
7.57k
  } else if (CalleeWillPop && 
StackArgsSize > 011
) {
4443
0
    // If we don't have bytes to pop, make sure the callee doesn't pop any.
4444
0
    return false;
4445
0
  }
4446
7.58k
4447
7.58k
  return true;
4448
7.58k
}
4449
4450
FastISel *
4451
X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4452
9.56k
                                  const TargetLibraryInfo *libInfo) const {
4453
9.56k
  return X86::createFastISel(funcInfo, libInfo);
4454
9.56k
}
4455
4456
//===----------------------------------------------------------------------===//
4457
//                           Other Lowering Hooks
4458
//===----------------------------------------------------------------------===//
4459
4460
16.0k
static bool MayFoldLoad(SDValue Op) {
4461
16.0k
  return Op.hasOneUse() && 
ISD::isNormalLoad(Op.getNode())10.7k
;
4462
16.0k
}
4463
4464
1.06k
static bool MayFoldIntoStore(SDValue Op) {
4465
1.06k
  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4466
1.06k
}
4467
4468
1.30k
static bool MayFoldIntoZeroExtend(SDValue Op) {
4469
1.30k
  if (Op.hasOneUse()) {
4470
1.30k
    unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4471
1.30k
    return (ISD::ZERO_EXTEND == Opcode);
4472
1.30k
  }
4473
0
  return false;
4474
0
}
4475
4476
3.57M
static bool isTargetShuffle(unsigned Opcode) {
4477
3.57M
  switch(Opcode) {
4478
3.57M
  
default: return false2.47M
;
4479
3.57M
  case X86ISD::BLENDI:
4480
1.09M
  case X86ISD::PSHUFB:
4481
1.09M
  case X86ISD::PSHUFD:
4482
1.09M
  case X86ISD::PSHUFHW:
4483
1.09M
  case X86ISD::PSHUFLW:
4484
1.09M
  case X86ISD::SHUFP:
4485
1.09M
  case X86ISD::INSERTPS:
4486
1.09M
  case X86ISD::EXTRQI:
4487
1.09M
  case X86ISD::INSERTQI:
4488
1.09M
  case X86ISD::PALIGNR:
4489
1.09M
  case X86ISD::VSHLDQ:
4490
1.09M
  case X86ISD::VSRLDQ:
4491
1.09M
  case X86ISD::MOVLHPS:
4492
1.09M
  case X86ISD::MOVHLPS:
4493
1.09M
  case X86ISD::MOVSHDUP:
4494
1.09M
  case X86ISD::MOVSLDUP:
4495
1.09M
  case X86ISD::MOVDDUP:
4496
1.09M
  case X86ISD::MOVSS:
4497
1.09M
  case X86ISD::MOVSD:
4498
1.09M
  case X86ISD::UNPCKL:
4499
1.09M
  case X86ISD::UNPCKH:
4500
1.09M
  case X86ISD::VBROADCAST:
4501
1.09M
  case X86ISD::VPERMILPI:
4502
1.09M
  case X86ISD::VPERMILPV:
4503
1.09M
  case X86ISD::VPERM2X128:
4504
1.09M
  case X86ISD::SHUF128:
4505
1.09M
  case X86ISD::VPERMIL2:
4506
1.09M
  case X86ISD::VPERMI:
4507
1.09M
  case X86ISD::VPPERM:
4508
1.09M
  case X86ISD::VPERMV:
4509
1.09M
  case X86ISD::VPERMV3:
4510
1.09M
  case X86ISD::VZEXT_MOVL:
4511
1.09M
    return true;
4512
3.57M
  }
4513
3.57M
}
4514
4515
280k
static bool isTargetShuffleVariableMask(unsigned Opcode) {
4516
280k
  switch (Opcode) {
4517
280k
  
default: return false227k
;
4518
280k
  // Target Shuffles.
4519
280k
  case X86ISD::PSHUFB:
4520
34.9k
  case X86ISD::VPERMILPV:
4521
34.9k
  case X86ISD::VPERMIL2:
4522
34.9k
  case X86ISD::VPPERM:
4523
34.9k
  case X86ISD::VPERMV:
4524
34.9k
  case X86ISD::VPERMV3:
4525
34.9k
    return true;
4526
34.9k
  // 'Faux' Target Shuffles.
4527
34.9k
  case ISD::OR:
4528
18.1k
  case ISD::AND:
4529
18.1k
  case X86ISD::ANDNP:
4530
18.1k
    return true;
4531
280k
  }
4532
280k
}
4533
4534
4.31k
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4535
4.31k
  MachineFunction &MF = DAG.getMachineFunction();
4536
4.31k
  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4537
4.31k
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4538
4.31k
  int ReturnAddrIndex = FuncInfo->getRAIndex();
4539
4.31k
4540
4.31k
  if (ReturnAddrIndex == 0) {
4541
4.28k
    // Set up a frame object for the return address.
4542
4.28k
    unsigned SlotSize = RegInfo->getSlotSize();
4543
4.28k
    ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4544
4.28k
                                                          -(int64_t)SlotSize,
4545
4.28k
                                                          false);
4546
4.28k
    FuncInfo->setRAIndex(ReturnAddrIndex);
4547
4.28k
  }
4548
4.31k
4549
4.31k
  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4550
4.31k
}
4551
4552
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4553
6.92M
                                       bool hasSymbolicDisplacement) {
4554
6.92M
  // Offset should fit into 32 bit immediate field.
4555
6.92M
  if (!isInt<32>(Offset))
4556
7.39k
    return false;
4557
6.91M
4558
6.91M
  // If we don't have a symbolic displacement - we don't have any extra
4559
6.91M
  // restrictions.
4560
6.91M
  if (!hasSymbolicDisplacement)
4561
6.62M
    return true;
4562
292k
4563
292k
  // FIXME: Some tweaks might be needed for medium code model.
4564
292k
  if (M != CodeModel::Small && 
M != CodeModel::Kernel756
)
4565
478
    return false;
4566
292k
4567
292k
  // For small code model we assume that latest object is 16MB before end of 31
4568
292k
  // bits boundary. We may also accept pretty large negative constants knowing
4569
292k
  // that all objects are in the positive half of address space.
4570
292k
  if (M == CodeModel::Small && 
Offset < 16*1024*1024291k
)
4571
291k
    return true;
4572
284
4573
284
  // For kernel code model we know that all object resist in the negative half
4574
284
  // of 32bits address space. We may not accept negative offsets, since they may
4575
284
  // be just off and we may accept pretty large positive ones.
4576
284
  if (M == CodeModel::Kernel && 
Offset >= 0278
)
4577
272
    return true;
4578
12
4579
12
  return false;
4580
12
}
4581
4582
/// Determines whether the callee is required to pop its own arguments.
4583
/// Callee pop is necessary to support tail calls.
4584
bool X86::isCalleePop(CallingConv::ID CallingConv,
4585
277k
                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4586
277k
  // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4587
277k
  // can guarantee TCO.
4588
277k
  if (!IsVarArg && 
shouldGuaranteeTCO(CallingConv, GuaranteeTCO)270k
)
4589
50
    return true;
4590
277k
4591
277k
  switch (CallingConv) {
4592
277k
  default:
4593
277k
    return false;
4594
277k
  case CallingConv::X86_StdCall:
4595
493
  case CallingConv::X86_FastCall:
4596
493
  case CallingConv::X86_ThisCall:
4597
493
  case CallingConv::X86_VectorCall:
4598
493
    return !is64Bit;
4599
277k
  }
4600
277k
}
4601
4602
/// Return true if the condition is an unsigned comparison operation.
4603
84
static bool isX86CCUnsigned(unsigned X86CC) {
4604
84
  switch (X86CC) {
4605
84
  default:
4606
0
    llvm_unreachable("Invalid integer condition!");
4607
84
  case X86::COND_E:
4608
84
  case X86::COND_NE:
4609
84
  case X86::COND_B:
4610
84
  case X86::COND_A:
4611
84
  case X86::COND_BE:
4612
84
  case X86::COND_AE:
4613
84
    return true;
4614
84
  case X86::COND_G:
4615
0
  case X86::COND_GE:
4616
0
  case X86::COND_L:
4617
0
  case X86::COND_LE:
4618
0
    return false;
4619
84
  }
4620
84
}
4621
4622
148k
static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4623
148k
  switch (SetCCOpcode) {
4624
148k
  
default: 0
llvm_unreachable0
("Invalid integer condition!");
4625
148k
  
case ISD::SETEQ: return X86::COND_E73.9k
;
4626
148k
  
case ISD::SETGT: return X86::COND_G1.15k
;
4627
148k
  
case ISD::SETGE: return X86::COND_GE1.44k
;
4628
148k
  
case ISD::SETLT: return X86::COND_L1.80k
;
4629
148k
  
case ISD::SETLE: return X86::COND_LE224
;
4630
148k
  
case ISD::SETNE: return X86::COND_NE36.2k
;
4631
148k
  
case ISD::SETULT: return X86::COND_B16.1k
;
4632
148k
  
case ISD::SETUGT: return X86::COND_A8.73k
;
4633
148k
  
case ISD::SETULE: return X86::COND_BE3.70k
;
4634
148k
  
case ISD::SETUGE: return X86::COND_AE5.03k
;
4635
148k
  }
4636
148k
}
4637
4638
/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4639
/// condition code, returning the condition code and the LHS/RHS of the
4640
/// comparison to make.
4641
static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4642
                               bool isFP, SDValue &LHS, SDValue &RHS,
4643
154k
                               SelectionDAG &DAG) {
4644
154k
  if (!isFP) {
4645
152k
    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4646
116k
      if (SetCCOpcode == ISD::SETGT && 
RHSC->isAllOnesValue()2.65k
) {
4647
1.76k
        // X > -1   -> X == 0, jump !sign.
4648
1.76k
        RHS = DAG.getConstant(0, DL, RHS.getValueType());
4649
1.76k
        return X86::COND_NS;
4650
1.76k
      }
4651
114k
      if (SetCCOpcode == ISD::SETLT && 
RHSC->isNullValue()3.12k
) {
4652
1.40k
        // X < 0   -> X == 0, jump on sign.
4653
1.40k
        return X86::COND_S;
4654
1.40k
      }
4655
113k
      if (SetCCOpcode == ISD::SETLT && 
RHSC->getZExtValue() == 11.72k
) {
4656
962
        // X < 1   -> X <= 0
4657
962
        RHS = DAG.getConstant(0, DL, RHS.getValueType());
4658
962
        return X86::COND_LE;
4659
962
      }
4660
148k
    }
4661
148k
4662
148k
    return TranslateIntegerX86CC(SetCCOpcode);
4663
148k
  }
4664
2.21k
4665
2.21k
  // First determine if it is required or is profitable to flip the operands.
4666
2.21k
4667
2.21k
  // If LHS is a foldable load, but RHS is not, flip the condition.
4668
2.21k
  if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4669
2.21k
      
!ISD::isNON_EXTLoad(RHS.getNode())391
) {
4670
335
    SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4671
335
    std::swap(LHS, RHS);
4672
335
  }
4673
2.21k
4674
2.21k
  switch (SetCCOpcode) {
4675
2.21k
  
default: break2.08k
;
4676
2.21k
  case ISD::SETOLT:
4677
128
  case ISD::SETOLE:
4678
128
  case ISD::SETUGT:
4679
128
  case ISD::SETUGE:
4680
128
    std::swap(LHS, RHS);
4681
128
    break;
4682
2.21k
  }
4683
2.21k
4684
2.21k
  // On a floating point condition, the flags are set as follows:
4685
2.21k
  // ZF  PF  CF   op
4686
2.21k
  //  0 | 0 | 0 | X > Y
4687
2.21k
  //  0 | 0 | 1 | X < Y
4688
2.21k
  //  1 | 0 | 0 | X == Y
4689
2.21k
  //  1 | 1 | 1 | unordered
4690
2.21k
  switch (SetCCOpcode) {
4691
2.21k
  
default: 0
llvm_unreachable0
("Condcode should be pre-legalized away");
4692
2.21k
  case ISD::SETUEQ:
4693
227
  case ISD::SETEQ:   return X86::COND_E;
4694
227
  case ISD::SETOLT:              // flipped
4695
227
  case ISD::SETOGT:
4696
227
  case ISD::SETGT:   return X86::COND_A;
4697
414
  case ISD::SETOLE:              // flipped
4698
414
  case ISD::SETOGE:
4699
414
  case ISD::SETGE:   return X86::COND_AE;
4700
441
  case ISD::SETUGT:              // flipped
4701
441
  case ISD::SETULT:
4702
441
  case ISD::SETLT:   return X86::COND_B;
4703
441
  case ISD::SETUGE:              // flipped
4704
258
  case ISD::SETULE:
4705
258
  case ISD::SETLE:   return X86::COND_BE;
4706
258
  case ISD::SETONE:
4707
34
  case ISD::SETNE:   return X86::COND_NE;
4708
215
  case ISD::SETUO:   return X86::COND_P;
4709
231
  case ISD::SETO:    return X86::COND_NP;
4710
166
  case ISD::SETOEQ:
4711
166
  case ISD::SETUNE:  return X86::COND_INVALID;
4712
2.21k
  }
4713
2.21k
}
4714
4715
/// Is there a floating point cmov for the specific X86 condition code?
4716
/// Current x86 isa includes the following FP cmov instructions:
4717
/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4718
799
static bool hasFPCMov(unsigned X86CC) {
4719
799
  switch (X86CC) {
4720
799
  default:
4721
77
    return false;
4722
799
  case X86::COND_B:
4723
722
  case X86::COND_BE:
4724
722
  case X86::COND_E:
4725
722
  case X86::COND_P:
4726
722
  case X86::COND_A:
4727
722
  case X86::COND_AE:
4728
722
  case X86::COND_NE:
4729
722
  case X86::COND_NP:
4730
722
    return true;
4731
799
  }
4732
799
}
4733
4734
4735
bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4736
                                           const CallInst &I,
4737
                                           MachineFunction &MF,
4738
14.3k
                                           unsigned Intrinsic) const {
4739
14.3k
4740
14.3k
  const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4741
14.3k
  if (!IntrData)
4742
13.6k
    return false;
4743
644
4744
644
  Info.flags = MachineMemOperand::MONone;
4745
644
  Info.offset = 0;
4746
644
4747
644
  switch (IntrData->Type) {
4748
644
  case TRUNCATE_TO_MEM_VI8:
4749
216
  case TRUNCATE_TO_MEM_VI16:
4750
216
  case TRUNCATE_TO_MEM_VI32: {
4751
216
    Info.opc = ISD::INTRINSIC_VOID;
4752
216
    Info.ptrVal = I.getArgOperand(0);
4753
216
    MVT VT  = MVT::getVT(I.getArgOperand(1)->getType());
4754
216
    MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4755
216
    if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4756
108
      ScalarVT = MVT::i8;
4757
108
    else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4758
72
      ScalarVT = MVT::i16;
4759
36
    else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4760
36
      ScalarVT = MVT::i32;
4761
216
4762
216
    Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4763
216
    Info.align = 1;
4764
216
    Info.flags |= MachineMemOperand::MOStore;
4765
216
    break;
4766
216
  }
4767
285
  case GATHER:
4768
285
  case GATHER_AVX2: {
4769
285
    Info.opc = ISD::INTRINSIC_W_CHAIN;
4770
285
    Info.ptrVal = nullptr;
4771
285
    MVT DataVT = MVT::getVT(I.getType());
4772
285
    MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4773
285
    unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4774
285
                                IndexVT.getVectorNumElements());
4775
285
    Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4776
285
    Info.align = 1;
4777
285
    Info.flags |= MachineMemOperand::MOLoad;
4778
285
    break;
4779
285
  }
4780
285
  case SCATTER: {
4781
98
    Info.opc = ISD::INTRINSIC_VOID;
4782
98
    Info.ptrVal = nullptr;
4783
98
    MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
4784
98
    MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4785
98
    unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4786
98
                                IndexVT.getVectorNumElements());
4787
98
    Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4788
98
    Info.align = 1;
4789
98
    Info.flags |= MachineMemOperand::MOStore;
4790
98
    break;
4791
285
  }
4792
285
  default:
4793
45
    return false;
4794
599
  }
4795
599
4796
599
  return true;
4797
599
}
4798
4799
/// Returns true if the target can instruction select the
4800
/// specified FP immediate natively. If false, the legalizer will
4801
/// materialize the FP immediate as a load from a constant pool.
4802
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4803
6.91k
                                     bool ForCodeSize) const {
4804
36.7k
  for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; 
++i29.8k
) {
4805
32.8k
    if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4806
3.00k
      return true;
4807
32.8k
  }
4808
6.91k
  
return false3.91k
;
4809
6.91k
}
4810
4811
bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4812
                                              ISD::LoadExtType ExtTy,
4813
6.50k
                                              EVT NewVT) const {
4814
6.50k
  // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4815
6.50k
  // relocation target a movq or addq instruction: don't let the load shrink.
4816
6.50k
  SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4817
6.50k
  if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4818
35
    if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4819
35
      return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4820
6.46k
4821
6.46k
  // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
4822
6.46k
  // those uses are extracted directly into a store, then the extract + store
4823
6.46k
  // can be store-folded. Therefore, it's probably not worth splitting the load.
4824
6.46k
  EVT VT = Load->getValueType(0);
4825
6.46k
  if ((VT.is256BitVector() || 
VT.is512BitVector()4.61k
) &&
!Load->hasOneUse()3.15k
) {
4826
3.35k
    for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; 
++UI939
) {
4827
3.28k
      // Skip uses of the chain value. Result 0 of the node is the load value.
4828
3.28k
      if (UI.getUse().getResNo() != 0)
4829
789
        continue;
4830
2.49k
4831
2.49k
      // If this use is not an extract + store, it's probably worth splitting.
4832
2.49k
      if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || 
!UI->hasOneUse()2.46k
||
4833
2.49k
          
UI->use_begin()->getOpcode() != ISD::STORE2.23k
)
4834
2.34k
        return true;
4835
2.49k
    }
4836
2.41k
    // All non-chain uses are extract + store.
4837
2.41k
    
return false72
;
4838
4.05k
  }
4839
4.05k
4840
4.05k
  return true;
4841
4.05k
}
4842
4843
/// Returns true if it is beneficial to convert a load of a constant
4844
/// to just the constant itself.
4845
bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4846
133
                                                          Type *Ty) const {
4847
133
  assert(Ty->isIntegerTy());
4848
133
4849
133
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
4850
133
  if (BitSize == 0 || BitSize > 64)
4851
0
    return false;
4852
133
  return true;
4853
133
}
4854
4855
31.0k
bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
4856
31.0k
  // If we are using XMM registers in the ABI and the condition of the select is
4857
31.0k
  // a floating-point compare and we have blendv or conditional move, then it is
4858
31.0k
  // cheaper to select instead of doing a cross-register move and creating a
4859
31.0k
  // load that depends on the compare result.
4860
31.0k
  return !IsFPSetCC || 
!Subtarget.isTarget64BitLP64()3.34k
||
!Subtarget.hasAVX()2.40k
;
4861
31.0k
}
4862
4863
28.3k
bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4864
28.3k
  // TODO: It might be a win to ease or lift this restriction, but the generic
4865
28.3k
  // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4866
28.3k
  if (VT.isVector() && 
Subtarget.hasAVX512()26.7k
)
4867
24.7k
    return false;
4868
3.60k
4869
3.60k
  return true;
4870
3.60k
}
4871
4872
19.2k
bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
4873
19.2k
  // TODO: We handle scalars using custom code, but generic combining could make
4874
19.2k
  // that unnecessary.
4875
19.2k
  APInt MulC;
4876
19.2k
  if (!ISD::isConstantSplatVector(C.getNode(), MulC))
4877
18.1k
    return false;
4878
1.10k
4879
1.10k
  // If vector multiply is legal, assume that's faster than shl + add/sub.
4880
1.10k
  // TODO: Multiply is a complex op with higher latency and lower througput in
4881
1.10k
  //       most implementations, so this check could be loosened based on type
4882
1.10k
  //       and/or a CPU attribute.
4883
1.10k
  if (isOperationLegal(ISD::MUL, VT))
4884
806
    return false;
4885
297
4886
297
  // shl+add, shl+sub, shl+add+neg
4887
297
  return (MulC + 1).isPowerOf2() || 
(MulC - 1).isPowerOf2()255
||
4888
297
         
(1 - MulC).isPowerOf2()246
||
(-(MulC + 1)).isPowerOf2()238
;
4889
297
}
4890
4891
bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
4892
332
                                                 bool IsSigned) const {
4893
332
  // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
4894
332
  return !IsSigned && FpVT == MVT::f80 && 
Subtarget.hasCMov()14
;
4895
332
}
4896
4897
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4898
22.8k
                                                unsigned Index) const {
4899
22.8k
  if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4900
4.98k
    return false;
4901
17.8k
4902
17.8k
  // Mask vectors support all subregister combinations and operations that
4903
17.8k
  // extract half of vector.
4904
17.8k
  if (ResVT.getVectorElementType() == MVT::i1)
4905
75
    return Index == 0 || 
(18
(ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2)18
&&
4906
18
                          
(Index == ResVT.getVectorNumElements())0
);
4907
17.8k
4908
17.8k
  return (Index % ResVT.getVectorNumElements()) == 0;
4909
17.8k
}
4910
4911
10.4k
bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
4912
10.4k
  unsigned Opc = VecOp.getOpcode();
4913
10.4k
4914
10.4k
  // Assume target opcodes can't be scalarized.
4915
10.4k
  // TODO - do we have any exceptions?
4916
10.4k
  if (Opc >= ISD::BUILTIN_OP_END)
4917
1.75k
    return false;
4918
8.73k
4919
8.73k
  // If the vector op is not supported, try to convert to scalar.
4920
8.73k
  EVT VecVT = VecOp.getValueType();
4921
8.73k
  if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
4922
2.52k
    return true;
4923
6.21k
4924
6.21k
  // If the vector op is supported, but the scalar op is not, the transform may
4925
6.21k
  // not be worthwhile.
4926
6.21k
  EVT ScalarVT = VecVT.getScalarType();
4927
6.21k
  return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
4928
6.21k
}
4929
4930
6.35k
bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
4931
6.35k
  // TODO: Allow vectors?
4932
6.35k
  if (VT.isVector())
4933</