Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86ISelLowering.h
Line
Count
Source (jump to first uncovered line)
1
//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that X86 uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17
#include "llvm/CodeGen/CallingConvLower.h"
18
#include "llvm/CodeGen/SelectionDAG.h"
19
#include "llvm/CodeGen/TargetLowering.h"
20
#include "llvm/Target/TargetOptions.h"
21
22
namespace llvm {
23
  class X86Subtarget;
24
  class X86TargetMachine;
25
26
  namespace X86ISD {
27
    // X86 Specific DAG Nodes
28
    enum NodeType : unsigned {
29
      // Start the numbering where the builtin ops leave off.
30
      FIRST_NUMBER = ISD::BUILTIN_OP_END,
31
32
      /// Bit scan forward.
33
      BSF,
34
      /// Bit scan reverse.
35
      BSR,
36
37
      /// Double shift instructions. These correspond to
38
      /// X86::SHLDxx and X86::SHRDxx instructions.
39
      SHLD,
40
      SHRD,
41
42
      /// Bitwise logical AND of floating point values. This corresponds
43
      /// to X86::ANDPS or X86::ANDPD.
44
      FAND,
45
46
      /// Bitwise logical OR of floating point values. This corresponds
47
      /// to X86::ORPS or X86::ORPD.
48
      FOR,
49
50
      /// Bitwise logical XOR of floating point values. This corresponds
51
      /// to X86::XORPS or X86::XORPD.
52
      FXOR,
53
54
      ///  Bitwise logical ANDNOT of floating point values. This
55
      /// corresponds to X86::ANDNPS or X86::ANDNPD.
56
      FANDN,
57
58
      /// These operations represent an abstract X86 call
59
      /// instruction, which includes a bunch of information.  In particular the
60
      /// operands of these node are:
61
      ///
62
      ///     #0 - The incoming token chain
63
      ///     #1 - The callee
64
      ///     #2 - The number of arg bytes the caller pushes on the stack.
65
      ///     #3 - The number of arg bytes the callee pops off the stack.
66
      ///     #4 - The value to pass in AL/AX/EAX (optional)
67
      ///     #5 - The value to pass in DL/DX/EDX (optional)
68
      ///
69
      /// The result values of these nodes are:
70
      ///
71
      ///     #0 - The outgoing token chain
72
      ///     #1 - The first register result value (optional)
73
      ///     #2 - The second register result value (optional)
74
      ///
75
      CALL,
76
77
      /// Same as call except it adds the NoTrack prefix.
78
      NT_CALL,
79
80
      /// X86 compare and logical compare instructions.
81
      CMP, COMI, UCOMI,
82
83
      /// X86 bit-test instructions.
84
      BT,
85
86
      /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
87
      /// operand, usually produced by a CMP instruction.
88
      SETCC,
89
90
      /// X86 Select
91
      SELECTS,
92
93
      // Same as SETCC except it's materialized with a sbb and the value is all
94
      // one's or all zero's.
95
      SETCC_CARRY,  // R = carry_bit ? ~0 : 0
96
97
      /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
98
      /// Operands are two FP values to compare; result is a mask of
99
      /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
100
      FSETCC,
101
102
      /// X86 FP SETCC, similar to above, but with output as an i1 mask and
103
      /// and a version with SAE.
104
      FSETCCM, FSETCCM_SAE,
105
106
      /// X86 conditional moves. Operand 0 and operand 1 are the two values
107
      /// to select from. Operand 2 is the condition code, and operand 3 is the
108
      /// flag operand produced by a CMP or TEST instruction.
109
      CMOV,
110
111
      /// X86 conditional branches. Operand 0 is the chain operand, operand 1
112
      /// is the block to branch if condition is true, operand 2 is the
113
      /// condition code, and operand 3 is the flag operand produced by a CMP
114
      /// or TEST instruction.
115
      BRCOND,
116
117
      /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
118
      /// operand 1 is the target address.
119
      NT_BRIND,
120
121
      /// Return with a flag operand. Operand 0 is the chain operand, operand
122
      /// 1 is the number of bytes of stack to pop.
123
      RET_FLAG,
124
125
      /// Return from interrupt. Operand 0 is the number of bytes to pop.
126
      IRET,
127
128
      /// Repeat fill, corresponds to X86::REP_STOSx.
129
      REP_STOS,
130
131
      /// Repeat move, corresponds to X86::REP_MOVSx.
132
      REP_MOVS,
133
134
      /// On Darwin, this node represents the result of the popl
135
      /// at function entry, used for PIC code.
136
      GlobalBaseReg,
137
138
      /// A wrapper node for TargetConstantPool, TargetJumpTable,
139
      /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
140
      /// MCSymbol and TargetBlockAddress.
141
      Wrapper,
142
143
      /// Special wrapper used under X86-64 PIC mode for RIP
144
      /// relative displacements.
145
      WrapperRIP,
146
147
      /// Copies a 64-bit value from the low word of an XMM vector
148
      /// to an MMX vector.
149
      MOVDQ2Q,
150
151
      /// Copies a 32-bit value from the low word of a MMX
152
      /// vector to a GPR.
153
      MMX_MOVD2W,
154
155
      /// Copies a GPR into the low 32-bit word of a MMX vector
156
      /// and zero out the high word.
157
      MMX_MOVW2D,
158
159
      /// Extract an 8-bit value from a vector and zero extend it to
160
      /// i32, corresponds to X86::PEXTRB.
161
      PEXTRB,
162
163
      /// Extract a 16-bit value from a vector and zero extend it to
164
      /// i32, corresponds to X86::PEXTRW.
165
      PEXTRW,
166
167
      /// Insert any element of a 4 x float vector into any element
168
      /// of a destination 4 x floatvector.
169
      INSERTPS,
170
171
      /// Insert the lower 8-bits of a 32-bit value to a vector,
172
      /// corresponds to X86::PINSRB.
173
      PINSRB,
174
175
      /// Insert the lower 16-bits of a 32-bit value to a vector,
176
      /// corresponds to X86::PINSRW.
177
      PINSRW,
178
179
      /// Shuffle 16 8-bit values within a vector.
180
      PSHUFB,
181
182
      /// Compute Sum of Absolute Differences.
183
      PSADBW,
184
      /// Compute Double Block Packed Sum-Absolute-Differences
185
      DBPSADBW,
186
187
      /// Bitwise Logical AND NOT of Packed FP values.
188
      ANDNP,
189
190
      /// Blend where the selector is an immediate.
191
      BLENDI,
192
193
      /// Dynamic (non-constant condition) vector blend where only the sign bits
194
      /// of the condition elements are used. This is used to enforce that the
195
      /// condition mask is not valid for generic VSELECT optimizations. This
196
      /// is also used to implement the intrinsics.
197
      /// Operands are in VSELECT order: MASK, TRUE, FALSE
198
      BLENDV,
199
200
      /// Combined add and sub on an FP vector.
201
      ADDSUB,
202
203
      //  FP vector ops with rounding mode.
204
      FADD_RND, FADDS, FADDS_RND,
205
      FSUB_RND, FSUBS, FSUBS_RND,
206
      FMUL_RND, FMULS, FMULS_RND,
207
      FDIV_RND, FDIVS, FDIVS_RND,
208
      FMAX_SAE, FMAXS_SAE,
209
      FMIN_SAE, FMINS_SAE,
210
      FSQRT_RND, FSQRTS, FSQRTS_RND,
211
212
      // FP vector get exponent.
213
      FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
214
      // Extract Normalized Mantissas.
215
      VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
216
      // FP Scale.
217
      SCALEF, SCALEF_RND,
218
      SCALEFS, SCALEFS_RND,
219
220
      // Unsigned Integer average.
221
      AVG,
222
223
      /// Integer horizontal add/sub.
224
      HADD,
225
      HSUB,
226
227
      /// Floating point horizontal add/sub.
228
      FHADD,
229
      FHSUB,
230
231
      // Detect Conflicts Within a Vector
232
      CONFLICT,
233
234
      /// Floating point max and min.
235
      FMAX, FMIN,
236
237
      /// Commutative FMIN and FMAX.
238
      FMAXC, FMINC,
239
240
      /// Scalar intrinsic floating point max and min.
241
      FMAXS, FMINS,
242
243
      /// Floating point reciprocal-sqrt and reciprocal approximation.
244
      /// Note that these typically require refinement
245
      /// in order to obtain suitable precision.
246
      FRSQRT, FRCP,
247
248
      // AVX-512 reciprocal approximations with a little more precision.
249
      RSQRT14, RSQRT14S, RCP14, RCP14S,
250
251
      // Thread Local Storage.
252
      TLSADDR,
253
254
      // Thread Local Storage. A call to get the start address
255
      // of the TLS block for the current module.
256
      TLSBASEADDR,
257
258
      // Thread Local Storage.  When calling to an OS provided
259
      // thunk at the address from an earlier relocation.
260
      TLSCALL,
261
262
      // Exception Handling helpers.
263
      EH_RETURN,
264
265
      // SjLj exception handling setjmp.
266
      EH_SJLJ_SETJMP,
267
268
      // SjLj exception handling longjmp.
269
      EH_SJLJ_LONGJMP,
270
271
      // SjLj exception handling dispatch.
272
      EH_SJLJ_SETUP_DISPATCH,
273
274
      /// Tail call return. See X86TargetLowering::LowerCall for
275
      /// the list of operands.
276
      TC_RETURN,
277
278
      // Vector move to low scalar and zero higher vector elements.
279
      VZEXT_MOVL,
280
281
      // Vector integer truncate.
282
      VTRUNC,
283
      // Vector integer truncate with unsigned/signed saturation.
284
      VTRUNCUS, VTRUNCS,
285
286
      // Masked version of the above. Used when less than a 128-bit result is
287
      // produced since the mask only applies to the lower elements and can't
288
      // be represented by a select.
289
      // SRC, PASSTHRU, MASK
290
      VMTRUNC, VMTRUNCUS, VMTRUNCS,
291
292
      // Vector FP extend.
293
      VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
294
295
      // Vector FP round.
296
      VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
297
298
      // Masked version of above. Used for v2f64->v4f32.
299
      // SRC, PASSTHRU, MASK
300
      VMFPROUND,
301
302
      // 128-bit vector logical left / right shift
303
      VSHLDQ, VSRLDQ,
304
305
      // Vector shift elements
306
      VSHL, VSRL, VSRA,
307
308
      // Vector variable shift
309
      VSHLV, VSRLV, VSRAV,
310
311
      // Vector shift elements by immediate
312
      VSHLI, VSRLI, VSRAI,
313
314
      // Shifts of mask registers.
315
      KSHIFTL, KSHIFTR,
316
317
      // Bit rotate by immediate
318
      VROTLI, VROTRI,
319
320
      // Vector packed double/float comparison.
321
      CMPP,
322
323
      // Vector integer comparisons.
324
      PCMPEQ, PCMPGT,
325
326
      // v8i16 Horizontal minimum and position.
327
      PHMINPOS,
328
329
      MULTISHIFT,
330
331
      /// Vector comparison generating mask bits for fp and
332
      /// integer signed and unsigned data types.
333
      CMPM,
334
      // Vector comparison with SAE for FP values
335
      CMPM_SAE,
336
337
      // Arithmetic operations with FLAGS results.
338
      ADD, SUB, ADC, SBB, SMUL, UMUL,
339
      OR, XOR, AND,
340
341
      // Bit field extract.
342
      BEXTR,
343
344
      // Zero High Bits Starting with Specified Bit Position.
345
      BZHI,
346
347
      // X86-specific multiply by immediate.
348
      MUL_IMM,
349
350
      // Vector sign bit extraction.
351
      MOVMSK,
352
353
      // Vector bitwise comparisons.
354
      PTEST,
355
356
      // Vector packed fp sign bitwise comparisons.
357
      TESTP,
358
359
      // OR/AND test for masks.
360
      KORTEST,
361
      KTEST,
362
363
      // ADD for masks.
364
      KADD,
365
366
      // Several flavors of instructions with vector shuffle behaviors.
367
      // Saturated signed/unnsigned packing.
368
      PACKSS,
369
      PACKUS,
370
      // Intra-lane alignr.
371
      PALIGNR,
372
      // AVX512 inter-lane alignr.
373
      VALIGN,
374
      PSHUFD,
375
      PSHUFHW,
376
      PSHUFLW,
377
      SHUFP,
378
      // VBMI2 Concat & Shift.
379
      VSHLD,
380
      VSHRD,
381
      VSHLDV,
382
      VSHRDV,
383
      //Shuffle Packed Values at 128-bit granularity.
384
      SHUF128,
385
      MOVDDUP,
386
      MOVSHDUP,
387
      MOVSLDUP,
388
      MOVLHPS,
389
      MOVHLPS,
390
      MOVSD,
391
      MOVSS,
392
      UNPCKL,
393
      UNPCKH,
394
      VPERMILPV,
395
      VPERMILPI,
396
      VPERMI,
397
      VPERM2X128,
398
399
      // Variable Permute (VPERM).
400
      // Res = VPERMV MaskV, V0
401
      VPERMV,
402
403
      // 3-op Variable Permute (VPERMT2).
404
      // Res = VPERMV3 V0, MaskV, V1
405
      VPERMV3,
406
407
      // Bitwise ternary logic.
408
      VPTERNLOG,
409
      // Fix Up Special Packed Float32/64 values.
410
      VFIXUPIMM, VFIXUPIMM_SAE,
411
      VFIXUPIMMS, VFIXUPIMMS_SAE,
412
      // Range Restriction Calculation For Packed Pairs of Float32/64 values.
413
      VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
414
      // Reduce - Perform Reduction Transformation on scalar\packed FP.
415
      VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
416
      // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
417
      // Also used by the legacy (V)ROUND intrinsics where we mask out the
418
      // scaling part of the immediate.
419
      VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
420
      // Tests Types Of a FP Values for packed types.
421
      VFPCLASS,
422
      // Tests Types Of a FP Values for scalar types.
423
      VFPCLASSS,
424
425
      // Broadcast scalar to vector.
426
      VBROADCAST,
427
      // Broadcast mask to vector.
428
      VBROADCASTM,
429
      // Broadcast subvector to vector.
430
      SUBV_BROADCAST,
431
432
      /// SSE4A Extraction and Insertion.
433
      EXTRQI, INSERTQI,
434
435
      // XOP arithmetic/logical shifts.
436
      VPSHA, VPSHL,
437
      // XOP signed/unsigned integer comparisons.
438
      VPCOM, VPCOMU,
439
      // XOP packed permute bytes.
440
      VPPERM,
441
      // XOP two source permutation.
442
      VPERMIL2,
443
444
      // Vector multiply packed unsigned doubleword integers.
445
      PMULUDQ,
446
      // Vector multiply packed signed doubleword integers.
447
      PMULDQ,
448
      // Vector Multiply Packed UnsignedIntegers with Round and Scale.
449
      MULHRS,
450
451
      // Multiply and Add Packed Integers.
452
      VPMADDUBSW, VPMADDWD,
453
454
      // AVX512IFMA multiply and add.
455
      // NOTE: These are different than the instruction and perform
456
      // op0 x op1 + op2.
457
      VPMADD52L, VPMADD52H,
458
459
      // VNNI
460
      VPDPBUSD,
461
      VPDPBUSDS,
462
      VPDPWSSD,
463
      VPDPWSSDS,
464
465
      // FMA nodes.
466
      // We use the target independent ISD::FMA for the non-inverted case.
467
      FNMADD,
468
      FMSUB,
469
      FNMSUB,
470
      FMADDSUB,
471
      FMSUBADD,
472
473
      // FMA with rounding mode.
474
      FMADD_RND,
475
      FNMADD_RND,
476
      FMSUB_RND,
477
      FNMSUB_RND,
478
      FMADDSUB_RND,
479
      FMSUBADD_RND,
480
481
      // Compress and expand.
482
      COMPRESS,
483
      EXPAND,
484
485
      // Bits shuffle
486
      VPSHUFBITQMB,
487
488
      // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
489
      SINT_TO_FP_RND, UINT_TO_FP_RND,
490
      SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
491
      SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
492
493
      // Vector float/double to signed/unsigned integer.
494
      CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
495
      // Scalar float/double to signed/unsigned integer.
496
      CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
497
498
      // Vector float/double to signed/unsigned integer with truncation.
499
      CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
500
      // Scalar float/double to signed/unsigned integer with truncation.
501
      CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
502
503
      // Vector signed/unsigned integer to float/double.
504
      CVTSI2P, CVTUI2P,
505
506
      // Masked versions of above. Used for v2f64->v4f32.
507
      // SRC, PASSTHRU, MASK
508
      MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
509
      MCVTSI2P, MCVTUI2P,
510
511
      // Vector float to bfloat16.
512
      // Convert TWO packed single data to one packed BF16 data
513
      CVTNE2PS2BF16, 
514
      // Convert packed single data to packed BF16 data
515
      CVTNEPS2BF16,
516
      // Masked version of above.
517
      // SRC, PASSTHRU, MASK
518
      MCVTNEPS2BF16,
519
520
      // Dot product of BF16 pairs to accumulated into
521
      // packed single precision.
522
      DPBF16PS,
523
524
      // Save xmm argument registers to the stack, according to %al. An operator
525
      // is needed so that this can be expanded with control flow.
526
      VASTART_SAVE_XMM_REGS,
527
528
      // Windows's _chkstk call to do stack probing.
529
      WIN_ALLOCA,
530
531
      // For allocating variable amounts of stack space when using
532
      // segmented stacks. Check if the current stacklet has enough space, and
533
      // falls back to heap allocation if not.
534
      SEG_ALLOCA,
535
536
      // Memory barriers.
537
      MEMBARRIER,
538
      MFENCE,
539
540
      // Store FP status word into i16 register.
541
      FNSTSW16r,
542
543
      // Store contents of %ah into %eflags.
544
      SAHF,
545
546
      // Get a random integer and indicate whether it is valid in CF.
547
      RDRAND,
548
549
      // Get a NIST SP800-90B & C compliant random integer and
550
      // indicate whether it is valid in CF.
551
      RDSEED,
552
553
      // Protection keys
554
      // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
555
      // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
556
      // value for ECX.
557
      RDPKRU, WRPKRU,
558
559
      // SSE42 string comparisons.
560
      // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
561
      // will emit one or two instructions based on which results are used. If
562
      // flags and index/mask this allows us to use a single instruction since
563
      // we won't have to pick and opcode for flags. Instead we can rely on the
564
      // DAG to CSE everything and decide at isel.
565
      PCMPISTR,
566
      PCMPESTR,
567
568
      // Test if in transactional execution.
569
      XTEST,
570
571
      // ERI instructions.
572
      RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
573
      RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
574
575
      // Conversions between float and half-float.
576
      CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
577
578
      // Masked version of above.
579
      // SRC, RND, PASSTHRU, MASK
580
      MCVTPS2PH,
581
582
      // Galois Field Arithmetic Instructions
583
      GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
584
585
      // LWP insert record.
586
      LWPINS,
587
588
      // User level wait
589
      UMWAIT, TPAUSE,
590
591
      // Enqueue Stores Instructions
592
      ENQCMD, ENQCMDS,
593
594
      // For avx512-vp2intersect
595
      VP2INTERSECT,
596
597
      // Compare and swap.
598
      LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
599
      LCMPXCHG8_DAG,
600
      LCMPXCHG16_DAG,
601
      LCMPXCHG8_SAVE_EBX_DAG,
602
      LCMPXCHG16_SAVE_RBX_DAG,
603
604
      /// LOCK-prefixed arithmetic read-modify-write instructions.
605
      /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
606
      LADD, LSUB, LOR, LXOR, LAND,
607
608
      // Load, scalar_to_vector, and zero extend.
609
      VZEXT_LOAD,
610
611
      // extract_vector_elt, store.
612
      VEXTRACT_STORE,
613
614
      // Store FP control world into i16 memory.
615
      FNSTCW16m,
616
617
      /// This instruction implements FP_TO_SINT with the
618
      /// integer destination in memory and a FP reg source.  This corresponds
619
      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
620
      /// has two inputs (token chain and address) and two outputs (int value
621
      /// and token chain). Memory VT specifies the type to store to.
622
      FP_TO_INT_IN_MEM,
623
624
      /// This instruction implements SINT_TO_FP with the
625
      /// integer source in memory and FP reg result.  This corresponds to the
626
      /// X86::FILD*m instructions. It has two inputs (token chain and address)
627
      /// and two outputs (FP value and token chain). FILD_FLAG also produces a
628
      /// flag). The integer source type is specified by the memory VT.
629
      FILD,
630
      FILD_FLAG,
631
632
      /// This instruction implements a fp->int store from FP stack
633
      /// slots. This corresponds to the fist instruction. It takes a
634
      /// chain operand, value to store, address, and glue. The memory VT
635
      /// specifies the type to store as.
636
      FIST,
637
638
      /// This instruction implements an extending load to FP stack slots.
639
      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
640
      /// operand, and ptr to load from. The memory VT specifies the type to
641
      /// load from.
642
      FLD,
643
644
      /// This instruction implements a truncating store from FP stack
645
      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
646
      /// chain operand, value to store, address, and glue. The memory VT
647
      /// specifies the type to store as.
648
      FST,
649
650
      /// This instruction grabs the address of the next argument
651
      /// from a va_list. (reads and modifies the va_list in memory)
652
      VAARG_64,
653
654
      // Vector truncating store with unsigned/signed saturation
655
      VTRUNCSTOREUS, VTRUNCSTORES,
656
      // Vector truncating masked store with unsigned/signed saturation
657
      VMTRUNCSTOREUS, VMTRUNCSTORES,
658
659
      // X86 specific gather and scatter
660
      MGATHER, MSCATTER,
661
662
      // WARNING: Do not add anything in the end unless you want the node to
663
      // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
664
      // opcodes will be thought as target memory ops!
665
    };
666
  } // end namespace X86ISD
667
668
  /// Define some predicates that are used for node matching.
669
  namespace X86 {
670
    /// Returns true if Elt is a constant zero or floating point constant +0.0.
671
    bool isZeroNode(SDValue Elt);
672
673
    /// Returns true of the given offset can be
674
    /// fit into displacement field of the instruction.
675
    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
676
                                      bool hasSymbolicDisplacement = true);
677
678
    /// Determines whether the callee is required to pop its
679
    /// own arguments. Callee pop is necessary to support tail calls.
680
    bool isCalleePop(CallingConv::ID CallingConv,
681
                     bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
682
683
  } // end namespace X86
684
685
  //===--------------------------------------------------------------------===//
686
  //  X86 Implementation of the TargetLowering interface
687
  class X86TargetLowering final : public TargetLowering {
688
  public:
689
    explicit X86TargetLowering(const X86TargetMachine &TM,
690
                               const X86Subtarget &STI);
691
692
    unsigned getJumpTableEncoding() const override;
693
    bool useSoftFloat() const override;
694
695
    void markLibCallAttributes(MachineFunction *MF, unsigned CC,
696
                               ArgListTy &Args) const override;
697
698
194k
    MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
699
194k
      return MVT::i8;
700
194k
    }
701
702
    const MCExpr *
703
    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
704
                              const MachineBasicBlock *MBB, unsigned uid,
705
                              MCContext &Ctx) const override;
706
707
    /// Returns relocation base for the given PIC jumptable.
708
    SDValue getPICJumpTableRelocBase(SDValue Table,
709
                                     SelectionDAG &DAG) const override;
710
    const MCExpr *
711
    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
712
                                 unsigned JTI, MCContext &Ctx) const override;
713
714
    /// Return the desired alignment for ByVal aggregate
715
    /// function arguments in the caller parameter area. For X86, aggregates
716
    /// that contains are placed at 16-byte boundaries while the rest are at
717
    /// 4-byte boundaries.
718
    unsigned getByValTypeAlignment(Type *Ty,
719
                                   const DataLayout &DL) const override;
720
721
    /// Returns the target specific optimal type for load
722
    /// and store operations as a result of memset, memcpy, and memmove
723
    /// lowering. If DstAlign is zero that means it's safe to destination
724
    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
725
    /// means there isn't a need to check it against alignment requirement,
726
    /// probably because the source does not need to be loaded. If 'IsMemset' is
727
    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
728
    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
729
    /// source is constant so it does not need to be loaded.
730
    /// It returns EVT::Other if the type should be determined using generic
731
    /// target-independent logic.
732
    EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
733
                            bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
734
                            const AttributeList &FuncAttributes) const override;
735
736
    /// Returns true if it's safe to use load / store of the
737
    /// specified type to expand memcpy / memset inline. This is mostly true
738
    /// for all types except for some special cases. For example, on X86
739
    /// targets without SSE2 f64 load / store are done with fldl / fstpl which
740
    /// also does type conversion. Note the specified type doesn't have to be
741
    /// legal as the hook is used before type legalization.
742
    bool isSafeMemOpType(MVT VT) const override;
743
744
    /// Returns true if the target allows unaligned memory accesses of the
745
    /// specified type. Returns whether it is "fast" in the last argument.
746
    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
747
                                        MachineMemOperand::Flags Flags,
748
                                        bool *Fast) const override;
749
750
    /// Provide custom lowering hooks for some operations.
751
    ///
752
    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
753
754
    /// Places new result values for the node in Results (their number
755
    /// and types must exactly match those of the original return values of
756
    /// the node), or leaves Results empty, which indicates that the node is not
757
    /// to be custom lowered after all.
758
    void LowerOperationWrapper(SDNode *N,
759
                               SmallVectorImpl<SDValue> &Results,
760
                               SelectionDAG &DAG) const override;
761
762
    /// Replace the results of node with an illegal result
763
    /// type with new values built out of custom code.
764
    ///
765
    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
766
                            SelectionDAG &DAG) const override;
767
768
    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
769
770
    // Return true if it is profitable to combine a BUILD_VECTOR with a
771
    // stride-pattern to a shuffle and a truncate.
772
    // Example of such a combine:
773
    // v4i32 build_vector((extract_elt V, 1),
774
    //                    (extract_elt V, 3),
775
    //                    (extract_elt V, 5),
776
    //                    (extract_elt V, 7))
777
    //  -->
778
    // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
779
    // v4i64)
780
    bool isDesirableToCombineBuildVectorToShuffleTruncate(
781
        ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
782
783
    /// Return true if the target has native support for
784
    /// the specified value type and it is 'desirable' to use the type for the
785
    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
786
    /// instruction encodings are longer and some i16 instructions are slow.
787
    bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
788
789
    /// Return true if the target has native support for the
790
    /// specified value type and it is 'desirable' to use the type. e.g. On x86
791
    /// i16 is legal, but undesirable since i16 instruction encodings are longer
792
    /// and some i16 instructions are slow.
793
    bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
794
795
    MachineBasicBlock *
796
    EmitInstrWithCustomInserter(MachineInstr &MI,
797
                                MachineBasicBlock *MBB) const override;
798
799
    /// This method returns the name of a target specific DAG node.
800
    const char *getTargetNodeName(unsigned Opcode) const override;
801
802
    /// Do not merge vector stores after legalization because that may conflict
803
    /// with x86-specific store splitting optimizations.
804
402k
    bool mergeStoresAfterLegalization(EVT MemVT) const override {
805
402k
      return !MemVT.isVector();
806
402k
    }
807
808
    bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
809
                          const SelectionDAG &DAG) const override;
810
811
    bool isCheapToSpeculateCttz() const override;
812
813
    bool isCheapToSpeculateCtlz() const override;
814
815
    bool isCtlzFast() const override;
816
817
27.6k
    bool hasBitPreservingFPLogic(EVT VT) const override {
818
27.6k
      return VT == MVT::f32 || 
VT == MVT::f6426.2k
||
VT.isVector()25.3k
;
819
27.6k
    }
820
821
468
    bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
822
468
      // If the pair to store is a mixture of float and int values, we will
823
468
      // save two bitwise instructions and one float-to-int instruction and
824
468
      // increase one store instruction. There is potentially a more
825
468
      // significant benefit because it avoids the float->int domain switch
826
468
      // for input value. So It is more likely a win.
827
468
      if ((LTy.isFloatingPoint() && 
HTy.isInteger()0
) ||
828
468
          (LTy.isInteger() && HTy.isFloatingPoint()))
829
0
        return true;
830
468
      // If the pair only contains int values, we will save two bitwise
831
468
      // instructions and increase one store instruction (costing one more
832
468
      // store buffer). Since the benefit is more blurred so we leave
833
468
      // such pair out until we get testcase to prove it is a win.
834
468
      return false;
835
468
    }
836
837
    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
838
839
    bool hasAndNotCompare(SDValue Y) const override;
840
841
    bool hasAndNot(SDValue Y) const override;
842
843
    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
844
                                           CombineLevel Level) const override;
845
846
    bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
847
848
    bool
849
    shouldTransformSignedTruncationCheck(EVT XVT,
850
127
                                         unsigned KeptBits) const override {
851
127
      // For vectors, we don't have a preference..
852
127
      if (XVT.isVector())
853
0
        return false;
854
127
855
241
      
auto VTIsOk = [](EVT VT) -> bool 127
{
856
241
        return VT == MVT::i8 || 
VT == MVT::i16209
||
VT == MVT::i32167
||
857
241
               
VT == MVT::i64105
;
858
241
      };
859
127
860
127
      // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
861
127
      // XVT will be larger than KeptBitsVT.
862
127
      MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
863
127
      return VTIsOk(XVT) && 
VTIsOk(KeptBitsVT)114
;
864
127
    }
865
866
1.14k
    bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
867
1.14k
      if (DAG.getMachineFunction().getFunction().hasMinSize())
868
47
        return false;
869
1.09k
      return true;
870
1.09k
    }
871
872
    bool shouldSplatInsEltVarIndex(EVT VT) const override;
873
874
1.68k
    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
875
1.68k
      return VT.isScalarInteger();
876
1.68k
    }
877
878
    /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
879
    MVT hasFastEqualityCompare(unsigned NumBits) const override;
880
881
    /// Return the value type to use for ISD::SETCC.
882
    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
883
                           EVT VT) const override;
884
885
    bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
886
                                      TargetLoweringOpt &TLO) const override;
887
888
    /// Determine which of the bits specified in Mask are known to be either
889
    /// zero or one and return them in the KnownZero/KnownOne bitsets.
890
    void computeKnownBitsForTargetNode(const SDValue Op,
891
                                       KnownBits &Known,
892
                                       const APInt &DemandedElts,
893
                                       const SelectionDAG &DAG,
894
                                       unsigned Depth = 0) const override;
895
896
    /// Determine the number of bits in the operation that are sign bits.
897
    unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
898
                                             const APInt &DemandedElts,
899
                                             const SelectionDAG &DAG,
900
                                             unsigned Depth) const override;
901
902
    bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
903
                                                 const APInt &DemandedElts,
904
                                                 APInt &KnownUndef,
905
                                                 APInt &KnownZero,
906
                                                 TargetLoweringOpt &TLO,
907
                                                 unsigned Depth) const override;
908
909
    bool SimplifyDemandedBitsForTargetNode(SDValue Op,
910
                                           const APInt &DemandedBits,
911
                                           const APInt &DemandedElts,
912
                                           KnownBits &Known,
913
                                           TargetLoweringOpt &TLO,
914
                                           unsigned Depth) const override;
915
916
    const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
917
918
    SDValue unwrapAddress(SDValue N) const override;
919
920
    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
921
922
    bool ExpandInlineAsm(CallInst *CI) const override;
923
924
    ConstraintType getConstraintType(StringRef Constraint) const override;
925
926
    /// Examine constraint string and operand type and determine a weight value.
927
    /// The operand object must already have been set up with the operand type.
928
    ConstraintWeight
929
      getSingleConstraintMatchWeight(AsmOperandInfo &info,
930
                                     const char *constraint) const override;
931
932
    const char *LowerXConstraint(EVT ConstraintVT) const override;
933
934
    /// Lower the specified operand into the Ops vector. If it is invalid, don't
935
    /// add anything to Ops. If hasMemory is true it means one of the asm
936
    /// constraint of the inline asm instruction being processed is 'm'.
937
    void LowerAsmOperandForConstraint(SDValue Op,
938
                                      std::string &Constraint,
939
                                      std::vector<SDValue> &Ops,
940
                                      SelectionDAG &DAG) const override;
941
942
    unsigned
943
266
    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
944
266
      if (ConstraintCode == "i")
945
3
        return InlineAsm::Constraint_i;
946
263
      else if (ConstraintCode == "o")
947
4
        return InlineAsm::Constraint_o;
948
259
      else if (ConstraintCode == "v")
949
0
        return InlineAsm::Constraint_v;
950
259
      else if (ConstraintCode == "X")
951
1
        return InlineAsm::Constraint_X;
952
258
      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
953
258
    }
954
955
    /// Handle Lowering flag assembly outputs.
956
    SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
957
                                        const AsmOperandInfo &Constraint,
958
                                        SelectionDAG &DAG) const override;
959
960
    /// Given a physical register constraint
961
    /// (e.g. {edx}), return the register number and the register class for the
962
    /// register.  This should only be used for C_Register constraints.  On
963
    /// error, this returns a register number of 0.
964
    std::pair<unsigned, const TargetRegisterClass *>
965
    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
966
                                 StringRef Constraint, MVT VT) const override;
967
968
    /// Return true if the addressing mode represented
969
    /// by AM is legal for this target, for a load/store of the specified type.
970
    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
971
                               Type *Ty, unsigned AS,
972
                               Instruction *I = nullptr) const override;
973
974
    /// Return true if the specified immediate is legal
975
    /// icmp immediate, that is the target has icmp instructions which can
976
    /// compare a register against the immediate without having to materialize
977
    /// the immediate into a register.
978
    bool isLegalICmpImmediate(int64_t Imm) const override;
979
980
    /// Return true if the specified immediate is legal
981
    /// add immediate, that is the target has add instructions which can
982
    /// add a register and the immediate without having to materialize
983
    /// the immediate into a register.
984
    bool isLegalAddImmediate(int64_t Imm) const override;
985
986
    bool isLegalStoreImmediate(int64_t Imm) const override;
987
988
    /// Return the cost of the scaling factor used in the addressing
989
    /// mode represented by AM for this target, for a load/store
990
    /// of the specified type.
991
    /// If the AM is supported, the return value must be >= 0.
992
    /// If the AM is not supported, it returns a negative value.
993
    int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
994
                             unsigned AS) const override;
995
996
    bool isVectorShiftByScalarCheap(Type *Ty) const override;
997
998
    /// Add x86-specific opcodes to the default list.
999
    bool isBinOp(unsigned Opcode) const override;
1000
1001
    /// Returns true if the opcode is a commutative binary operation.
1002
    bool isCommutativeBinOp(unsigned Opcode) const override;
1003
1004
    /// Return true if it's free to truncate a value of
1005
    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1006
    /// register EAX to i16 by referencing its sub-register AX.
1007
    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1008
    bool isTruncateFree(EVT VT1, EVT VT2) const override;
1009
1010
    bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1011
1012
    /// Return true if any actual instruction that defines a
1013
    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1014
    /// register. This does not necessarily include registers defined in
1015
    /// unknown ways, such as incoming arguments, or copies from unknown
1016
    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1017
    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1018
    /// all instructions that define 32-bit values implicit zero-extend the
1019
    /// result out to 64 bits.
1020
    bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1021
    bool isZExtFree(EVT VT1, EVT VT2) const override;
1022
    bool isZExtFree(SDValue Val, EVT VT2) const override;
1023
1024
    /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1025
    /// extend node) is profitable.
1026
    bool isVectorLoadExtDesirable(SDValue) const override;
1027
1028
    /// Return true if an FMA operation is faster than a pair of fmul and fadd
1029
    /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1030
    /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1031
    bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1032
1033
    /// Return true if it's profitable to narrow
1034
    /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1035
    /// from i32 to i8 but not from i32 to i16.
1036
    bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1037
1038
    /// Given an intrinsic, checks if on the target the intrinsic will need to map
1039
    /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1040
    /// true and stores the intrinsic information into the IntrinsicInfo that was
1041
    /// passed to the function.
1042
    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1043
                            MachineFunction &MF,
1044
                            unsigned Intrinsic) const override;
1045
1046
    /// Returns true if the target can instruction select the
1047
    /// specified FP immediate natively. If false, the legalizer will
1048
    /// materialize the FP immediate as a load from a constant pool.
1049
    bool isFPImmLegal(const APFloat &Imm, EVT VT,
1050
                      bool ForCodeSize) const override;
1051
1052
    /// Targets can use this to indicate that they only support *some*
1053
    /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1054
    /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1055
    /// be legal.
1056
    bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1057
1058
    /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1059
    /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1060
    /// constant pool entry.
1061
    bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1062
1063
    /// Returns true if lowering to a jump table is allowed.
1064
    bool areJTsAllowed(const Function *Fn) const override;
1065
1066
    /// If true, then instruction selection should
1067
    /// seek to shrink the FP constant of the specified type to a smaller type
1068
    /// in order to save space and / or reduce runtime.
1069
843
    bool ShouldShrinkFPConstant(EVT VT) const override {
1070
843
      // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1071
843
      // expensive than a straight movsd. On the other hand, it's important to
1072
843
      // shrink long double fp constant since fldt is very slow.
1073
843
      return !X86ScalarSSEf64 || 
VT == MVT::f80646
;
1074
843
    }
1075
1076
    /// Return true if we believe it is correct and profitable to reduce the
1077
    /// load node to a smaller type.
1078
    bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1079
                               EVT NewVT) const override;
1080
1081
    /// Return true if the specified scalar FP type is computed in an SSE
1082
    /// register, not on the X87 floating point stack.
1083
17.9k
    bool isScalarFPTypeInSSEReg(EVT VT) const {
1084
17.9k
      return (VT == MVT::f64 && 
X86ScalarSSEf646.00k
) || // f64 is when SSE2
1085
17.9k
             
(14.1k
VT == MVT::f3214.1k
&&
X86ScalarSSEf329.67k
); // f32 is when SSE1
1086
17.9k
    }
1087
1088
    /// Returns true if it is beneficial to convert a load of a constant
1089
    /// to just the constant itself.
1090
    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1091
                                           Type *Ty) const override;
1092
1093
    bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1094
1095
    bool convertSelectOfConstantsToMath(EVT VT) const override;
1096
1097
    bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1098
1099
    bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1100
                                  bool IsSigned) const override;
1101
1102
    /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1103
    /// with this index.
1104
    bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1105
                                 unsigned Index) const override;
1106
1107
    /// Scalar ops always have equal or better analysis/performance/power than
1108
    /// the vector equivalent, so this always makes sense if the scalar op is
1109
    /// supported.
1110
    bool shouldScalarizeBinop(SDValue) const override;
1111
1112
    /// Extract of a scalar FP value from index 0 of a vector is free.
1113
21.8k
    bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1114
21.8k
      EVT EltVT = VT.getScalarType();
1115
21.8k
      return (EltVT == MVT::f32 || 
EltVT == MVT::f6420.1k
) &&
Index == 02.87k
;
1116
21.8k
    }
1117
1118
    /// Overflow nodes should get combined/lowered to optimal instructions
1119
    /// (they should allow eliminating explicit compares by getting flags from
1120
    /// math ops).
1121
    bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1122
1123
    bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1124
46.6k
                                      unsigned AddrSpace) const override {
1125
46.6k
      // If we can replace more than 2 scalar stores, there will be a reduction
1126
46.6k
      // in instructions even after we add a vector constant load.
1127
46.6k
      return NumElem > 2;
1128
46.6k
    }
1129
1130
    bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1131
                                 const SelectionDAG &DAG,
1132
                                 const MachineMemOperand &MMO) const override;
1133
1134
    /// Intel processors have a unified instruction and data cache
1135
1
    const char * getClearCacheBuiltinName() const override {
1136
1
      return nullptr; // nothing to do, move along.
1137
1
    }
1138
1139
    unsigned getRegisterByName(const char* RegName, EVT VT,
1140
                               SelectionDAG &DAG) const override;
1141
1142
    /// If a physical register, this returns the register that receives the
1143
    /// exception address on entry to an EH pad.
1144
    unsigned
1145
    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1146
1147
    /// If a physical register, this returns the register that receives the
1148
    /// exception typeid on entry to a landing pad.
1149
    unsigned
1150
    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1151
1152
    virtual bool needsFixedCatchObjects() const override;
1153
1154
    /// This method returns a target specific FastISel object,
1155
    /// or null if the target does not support "fast" ISel.
1156
    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1157
                             const TargetLibraryInfo *libInfo) const override;
1158
1159
    /// If the target has a standard location for the stack protector cookie,
1160
    /// returns the address of that location. Otherwise, returns nullptr.
1161
    Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1162
1163
    bool useLoadStackGuardNode() const override;
1164
    bool useStackGuardXorFP() const override;
1165
    void insertSSPDeclarations(Module &M) const override;
1166
    Value *getSDagStackGuard(const Module &M) const override;
1167
    Function *getSSPStackGuardCheck(const Module &M) const override;
1168
    SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1169
                                const SDLoc &DL) const override;
1170
1171
1172
    /// Return true if the target stores SafeStack pointer at a fixed offset in
1173
    /// some non-standard address space, and populates the address space and
1174
    /// offset as appropriate.
1175
    Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1176
1177
    SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1178
                      SelectionDAG &DAG) const;
1179
1180
    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1181
1182
    /// Customize the preferred legalization strategy for certain types.
1183
    LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1184
1185
    MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1186
                                      EVT VT) const override;
1187
1188
    unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1189
                                           CallingConv::ID CC,
1190
                                           EVT VT) const override;
1191
1192
    bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1193
1194
    bool supportSwiftError() const override;
1195
1196
    StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1197
1198
158k
    bool hasVectorBlend() const override { return true; }
1199
1200
135k
    unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1201
1202
    /// Lower interleaved load(s) into target specific
1203
    /// instructions/intrinsics.
1204
    bool lowerInterleavedLoad(LoadInst *LI,
1205
                              ArrayRef<ShuffleVectorInst *> Shuffles,
1206
                              ArrayRef<unsigned> Indices,
1207
                              unsigned Factor) const override;
1208
1209
    /// Lower interleaved store(s) into target specific
1210
    /// instructions/intrinsics.
1211
    bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1212
                               unsigned Factor) const override;
1213
1214
    SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1215
                                   SDValue Addr, SelectionDAG &DAG)
1216
                                   const override;
1217
1218
  protected:
1219
    std::pair<const TargetRegisterClass *, uint8_t>
1220
    findRepresentativeClass(const TargetRegisterInfo *TRI,
1221
                            MVT VT) const override;
1222
1223
  private:
1224
    /// Keep a reference to the X86Subtarget around so that we can
1225
    /// make the right decision when generating code for different targets.
1226
    const X86Subtarget &Subtarget;
1227
1228
    /// Select between SSE or x87 floating point ops.
1229
    /// When SSE is available, use it for f32 operations.
1230
    /// When SSE2 is available, use it for f64 operations.
1231
    bool X86ScalarSSEf32;
1232
    bool X86ScalarSSEf64;
1233
1234
    /// A list of legal FP immediates.
1235
    std::vector<APFloat> LegalFPImmediates;
1236
1237
    /// Indicate that this x86 target can instruction
1238
    /// select the specified FP immediate natively.
1239
101k
    void addLegalFPImmediate(const APFloat& Imm) {
1240
101k
      LegalFPImmediates.push_back(Imm);
1241
101k
    }
1242
1243
    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1244
                            CallingConv::ID CallConv, bool isVarArg,
1245
                            const SmallVectorImpl<ISD::InputArg> &Ins,
1246
                            const SDLoc &dl, SelectionDAG &DAG,
1247
                            SmallVectorImpl<SDValue> &InVals,
1248
                            uint32_t *RegMask) const;
1249
    SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1250
                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1251
                             const SDLoc &dl, SelectionDAG &DAG,
1252
                             const CCValAssign &VA, MachineFrameInfo &MFI,
1253
                             unsigned i) const;
1254
    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1255
                             const SDLoc &dl, SelectionDAG &DAG,
1256
                             const CCValAssign &VA,
1257
                             ISD::ArgFlagsTy Flags) const;
1258
1259
    // Call lowering helpers.
1260
1261
    /// Check whether the call is eligible for tail call optimization. Targets
1262
    /// that want to do tail call optimization should implement this function.
1263
    bool IsEligibleForTailCallOptimization(SDValue Callee,
1264
                                           CallingConv::ID CalleeCC,
1265
                                           bool isVarArg,
1266
                                           bool isCalleeStructRet,
1267
                                           bool isCallerStructRet,
1268
                                           Type *RetTy,
1269
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1270
                                    const SmallVectorImpl<SDValue> &OutVals,
1271
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1272
                                           SelectionDAG& DAG) const;
1273
    SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1274
                                    SDValue Chain, bool IsTailCall,
1275
                                    bool Is64Bit, int FPDiff,
1276
                                    const SDLoc &dl) const;
1277
1278
    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1279
                                         SelectionDAG &DAG) const;
1280
1281
    unsigned getAddressSpace(void) const;
1282
1283
    SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1284
1285
    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1286
    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1287
    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1288
    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1289
1290
    unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1291
                                  const unsigned char OpFlags = 0) const;
1292
    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1293
    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1294
    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1295
    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1296
    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1297
1298
    /// Creates target global address or external symbol nodes for calls or
1299
    /// other uses.
1300
    SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1301
                                  bool ForCall) const;
1302
1303
    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1304
    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1305
    SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1306
    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1307
    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1308
    SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1309
    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1310
    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1311
    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1312
    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1313
    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1314
    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1315
    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1316
    SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1317
    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1318
    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1319
    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1320
    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1321
    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1322
    SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1323
    SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1324
    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1325
    SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1326
    SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1327
    SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1328
    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1329
1330
    SDValue
1331
    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1332
                         const SmallVectorImpl<ISD::InputArg> &Ins,
1333
                         const SDLoc &dl, SelectionDAG &DAG,
1334
                         SmallVectorImpl<SDValue> &InVals) const override;
1335
    SDValue LowerCall(CallLoweringInfo &CLI,
1336
                      SmallVectorImpl<SDValue> &InVals) const override;
1337
1338
    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1339
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1340
                        const SmallVectorImpl<SDValue> &OutVals,
1341
                        const SDLoc &dl, SelectionDAG &DAG) const override;
1342
1343
144k
    bool supportSplitCSR(MachineFunction *MF) const override {
1344
144k
      return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1345
144k
          
MF->getFunction().hasFnAttribute(Attribute::NoUnwind)28
;
1346
144k
    }
1347
    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1348
    void insertCopiesSplitCSR(
1349
      MachineBasicBlock *Entry,
1350
      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1351
1352
    bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1353
1354
    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1355
1356
    EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1357
                            ISD::NodeType ExtendKind) const override;
1358
1359
    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1360
                        bool isVarArg,
1361
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1362
                        LLVMContext &Context) const override;
1363
1364
    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1365
1366
    TargetLoweringBase::AtomicExpansionKind
1367
    shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1368
    bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1369
    TargetLoweringBase::AtomicExpansionKind
1370
    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1371
1372
    LoadInst *
1373
    lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1374
1375
    bool needsCmpXchgNb(Type *MemType) const;
1376
1377
    void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1378
                                MachineBasicBlock *DispatchBB, int FI) const;
1379
1380
    // Utility function to emit the low-level va_arg code for X86-64.
1381
    MachineBasicBlock *
1382
    EmitVAARG64WithCustomInserter(MachineInstr &MI,
1383
                                  MachineBasicBlock *MBB) const;
1384
1385
    /// Utility function to emit the xmm reg save portion of va_start.
1386
    MachineBasicBlock *
1387
    EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1388
                                             MachineBasicBlock *BB) const;
1389
1390
    MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1391
                                                 MachineInstr &MI2,
1392
                                                 MachineBasicBlock *BB) const;
1393
1394
    MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1395
                                         MachineBasicBlock *BB) const;
1396
1397
    MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1398
                                           MachineBasicBlock *BB) const;
1399
1400
    MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1401
                                           MachineBasicBlock *BB) const;
1402
1403
    MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1404
                                           MachineBasicBlock *BB) const;
1405
1406
    MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1407
                                            MachineBasicBlock *BB) const;
1408
1409
    MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1410
                                          MachineBasicBlock *BB) const;
1411
1412
    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1413
                                          MachineBasicBlock *BB) const;
1414
1415
    MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1416
                                            MachineBasicBlock *BB) const;
1417
1418
    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1419
                                        MachineBasicBlock *MBB) const;
1420
1421
    void emitSetJmpShadowStackFix(MachineInstr &MI,
1422
                                  MachineBasicBlock *MBB) const;
1423
1424
    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1425
                                         MachineBasicBlock *MBB) const;
1426
1427
    MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1428
                                                 MachineBasicBlock *MBB) const;
1429
1430
    MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1431
                                     MachineBasicBlock *MBB) const;
1432
1433
    MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1434
                                             MachineBasicBlock *MBB) const;
1435
1436
    /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1437
    /// equivalent, for use with the given x86 condition code.
1438
    SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1439
                    SelectionDAG &DAG) const;
1440
1441
    /// Convert a comparison if required by the subtarget.
1442
    SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1443
1444
    /// Emit flags for the given setcc condition and operands. Also returns the
1445
    /// corresponding X86 condition code constant in X86CC.
1446
    SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1447
                              ISD::CondCode CC, const SDLoc &dl,
1448
                              SelectionDAG &DAG,
1449
                              SDValue &X86CC) const;
1450
1451
    /// Check if replacement of SQRT with RSQRT should be disabled.
1452
    bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1453
1454
    /// Use rsqrt* to speed up sqrt calculations.
1455
    SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1456
                            int &RefinementSteps, bool &UseOneConstNR,
1457
                            bool Reciprocal) const override;
1458
1459
    /// Use rcp* to speed up fdiv calculations.
1460
    SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1461
                             int &RefinementSteps) const override;
1462
1463
    /// Reassociate floating point divisions into multiply by reciprocal.
1464
    unsigned combineRepeatedFPDivisors() const override;
1465
  };
1466
1467
  namespace X86 {
1468
    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1469
                             const TargetLibraryInfo *libInfo);
1470
  } // end namespace X86
1471
1472
  // Base class for all X86 non-masked store operations.
1473
  class X86StoreSDNode : public MemSDNode {
1474
  public:
1475
    X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1476
                   SDVTList VTs, EVT MemVT,
1477
                   MachineMemOperand *MMO)
1478
450
      :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1479
0
    const SDValue &getValue() const { return getOperand(1); }
1480
0
    const SDValue &getBasePtr() const { return getOperand(2); }
1481
1482
0
    static bool classof(const SDNode *N) {
1483
0
      return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1484
0
        N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1485
0
    }
1486
  };
1487
1488
  // Base class for all X86 masked store operations.
1489
  // The class has the same order of operands as MaskedStoreSDNode for
1490
  // convenience.
1491
  class X86MaskedStoreSDNode : public MemSDNode {
1492
  public:
1493
    X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1494
                         const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1495
                         MachineMemOperand *MMO)
1496
216
      : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1497
1498
0
    const SDValue &getValue()   const { return getOperand(1); }
1499
0
    const SDValue &getBasePtr() const { return getOperand(2); }
1500
0
    const SDValue &getMask()    const { return getOperand(3); }
1501
1502
0
    static bool classof(const SDNode *N) {
1503
0
      return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1504
0
        N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1505
0
    }
1506
  };
1507
1508
  // X86 Truncating Store with Signed saturation.
1509
  class TruncSStoreSDNode : public X86StoreSDNode {
1510
  public:
1511
    TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1512
                        SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1513
144
      : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1514
1515
0
    static bool classof(const SDNode *N) {
1516
0
      return N->getOpcode() == X86ISD::VTRUNCSTORES;
1517
0
    }
1518
  };
1519
1520
  // X86 Truncating Store with Unsigned saturation.
1521
  class TruncUSStoreSDNode : public X86StoreSDNode {
1522
  public:
1523
    TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1524
                      SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1525
306
      : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1526
1527
0
    static bool classof(const SDNode *N) {
1528
0
      return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1529
0
    }
1530
  };
1531
1532
  // X86 Truncating Masked Store with Signed saturation.
1533
  class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1534
  public:
1535
    MaskedTruncSStoreSDNode(unsigned Order,
1536
                         const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1537
                         MachineMemOperand *MMO)
1538
108
      : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1539
1540
0
    static bool classof(const SDNode *N) {
1541
0
      return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1542
0
    }
1543
  };
1544
1545
  // X86 Truncating Masked Store with Unsigned saturation.
1546
  class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1547
  public:
1548
    MaskedTruncUSStoreSDNode(unsigned Order,
1549
                            const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1550
                            MachineMemOperand *MMO)
1551
108
      : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1552
1553
0
    static bool classof(const SDNode *N) {
1554
0
      return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1555
0
    }
1556
  };
1557
1558
  // X86 specific Gather/Scatter nodes.
1559
  // The class has the same order of operands as MaskedGatherScatterSDNode for
1560
  // convenience.
1561
  class X86MaskedGatherScatterSDNode : public MemSDNode {
1562
  public:
1563
    X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1564
                                 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1565
                                 MachineMemOperand *MMO)
1566
2.59k
        : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1567
1568
0
    const SDValue &getBasePtr() const { return getOperand(3); }
1569
14.3k
    const SDValue &getIndex()   const { return getOperand(4); }
1570
0
    const SDValue &getMask()    const { return getOperand(2); }
1571
864
    const SDValue &getScale()   const { return getOperand(5); }
1572
1573
0
    static bool classof(const SDNode *N) {
1574
0
      return N->getOpcode() == X86ISD::MGATHER ||
1575
0
             N->getOpcode() == X86ISD::MSCATTER;
1576
0
    }
1577
  };
1578
1579
  class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1580
  public:
1581
    X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1582
                          EVT MemVT, MachineMemOperand *MMO)
1583
        : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1584
1.92k
                                       MMO) {}
1585
1586
0
    const SDValue &getPassThru() const { return getOperand(1); }
1587
1588
0
    static bool classof(const SDNode *N) {
1589
0
      return N->getOpcode() == X86ISD::MGATHER;
1590
0
    }
1591
  };
1592
1593
  class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1594
  public:
1595
    X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1596
                           EVT MemVT, MachineMemOperand *MMO)
1597
        : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1598
666
                                       MMO) {}
1599
1600
0
    const SDValue &getValue() const { return getOperand(1); }
1601
1602
0
    static bool classof(const SDNode *N) {
1603
0
      return N->getOpcode() == X86ISD::MSCATTER;
1604
0
    }
1605
  };
1606
1607
  /// Generate unpacklo/unpackhi shuffle mask.
1608
  template <typename T = int>
1609
  void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1610
305k
                               bool Unary) {
1611
305k
    assert(Mask.empty() && "Expected an empty shuffle mask vector");
1612
305k
    int NumElts = VT.getVectorNumElements();
1613
305k
    int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1614
4.19M
    for (int i = 0; i < NumElts; 
++i3.88M
) {
1615
3.88M
      unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1616
3.88M
      int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1617
3.88M
      Pos += (Unary ? 
01.43M
:
NumElts * (i % 2)2.45M
);
1618
3.88M
      Pos += (Lo ? 
02.00M
:
NumEltsInLane / 21.88M
);
1619
3.88M
      Mask.push_back(Pos);
1620
3.88M
    }
1621
305k
  }
void llvm::createUnpackShuffleMask<int>(llvm::MVT, llvm::SmallVectorImpl<int>&, bool, bool)
Line
Count
Source
1610
305k
                               bool Unary) {
1611
305k
    assert(Mask.empty() && "Expected an empty shuffle mask vector");
1612
305k
    int NumElts = VT.getVectorNumElements();
1613
305k
    int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1614
4.19M
    for (int i = 0; i < NumElts; 
++i3.88M
) {
1615
3.88M
      unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1616
3.88M
      int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1617
3.88M
      Pos += (Unary ? 
01.43M
:
NumElts * (i % 2)2.45M
);
1618
3.88M
      Pos += (Lo ? 
02.00M
:
NumEltsInLane / 21.88M
);
1619
3.88M
      Mask.push_back(Pos);
1620
3.88M
    }
1621
305k
  }
void llvm::createUnpackShuffleMask<unsigned int>(llvm::MVT, llvm::SmallVectorImpl<unsigned int>&, bool, bool)
Line
Count
Source
1610
56
                               bool Unary) {
1611
56
    assert(Mask.empty() && "Expected an empty shuffle mask vector");
1612
56
    int NumElts = VT.getVectorNumElements();
1613
56
    int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1614
1.46k
    for (int i = 0; i < NumElts; 
++i1.40k
) {
1615
1.40k
      unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1616
1.40k
      int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1617
1.40k
      Pos += (Unary ? 
00
: NumElts * (i % 2));
1618
1.40k
      Pos += (Lo ? 
0704
:
NumEltsInLane / 2704
);
1619
1.40k
      Mask.push_back(Pos);
1620
1.40k
    }
1621
56
  }
1622
1623
  /// Helper function to scale a shuffle or target shuffle mask, replacing each
1624
  /// mask index with the scaled sequential indices for an equivalent narrowed
1625
  /// mask. This is the reverse process to canWidenShuffleElements, but can
1626
  /// always succeed.
1627
  template <typename T>
1628
  void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1629
16.1k
                        SmallVectorImpl<T> &ScaledMask) {
1630
16.1k
    assert(0 < Scale && "Unexpected scaling factor");
1631
16.1k
    size_t NumElts = Mask.size();
1632
16.1k
    ScaledMask.assign(NumElts * Scale, -1);
1633
16.1k
1634
91.4k
    for (int i = 0; i != (int)NumElts; 
++i75.2k
) {
1635
75.2k
      int M = Mask[i];
1636
75.2k
1637
75.2k
      // Repeat sentinel values in every mask element.
1638
75.2k
      if (M < 0) {
1639
55.3k
        for (int s = 0; s != Scale; 
++s29.4k
)
1640
29.4k
          ScaledMask[(Scale * i) + s] = M;
1641
25.9k
        continue;
1642
25.9k
      }
1643
49.3k
1644
49.3k
      // Scale mask element and increment across each mask element.
1645
131k
      
for (int s = 0; 49.3k
s != Scale;
++s81.8k
)
1646
81.8k
        ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1647
49.3k
    }
1648
16.1k
  }
void llvm::scaleShuffleMask<int>(int, llvm::ArrayRef<int>, llvm::SmallVectorImpl<int>&)
Line
Count
Source
1629
16.1k
                        SmallVectorImpl<T> &ScaledMask) {
1630
16.1k
    assert(0 < Scale && "Unexpected scaling factor");
1631
16.1k
    size_t NumElts = Mask.size();
1632
16.1k
    ScaledMask.assign(NumElts * Scale, -1);
1633
16.1k
1634
90.8k
    for (int i = 0; i != (int)NumElts; 
++i74.7k
) {
1635
74.7k
      int M = Mask[i];
1636
74.7k
1637
74.7k
      // Repeat sentinel values in every mask element.
1638
74.7k
      if (M < 0) {
1639
55.3k
        for (int s = 0; s != Scale; 
++s29.4k
)
1640
29.4k
          ScaledMask[(Scale * i) + s] = M;
1641
25.9k
        continue;
1642
25.9k
      }
1643
48.8k
1644
48.8k
      // Scale mask element and increment across each mask element.
1645
129k
      
for (int s = 0; 48.8k
s != Scale;
++s80.8k
)
1646
80.8k
        ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1647
48.8k
    }
1648
16.1k
  }
void llvm::scaleShuffleMask<unsigned int>(int, llvm::ArrayRef<unsigned int>, llvm::SmallVectorImpl<unsigned int>&)
Line
Count
Source
1629
32
                        SmallVectorImpl<T> &ScaledMask) {
1630
32
    assert(0 < Scale && "Unexpected scaling factor");
1631
32
    size_t NumElts = Mask.size();
1632
32
    ScaledMask.assign(NumElts * Scale, -1);
1633
32
1634
544
    for (int i = 0; i != (int)NumElts; 
++i512
) {
1635
512
      int M = Mask[i];
1636
512
1637
512
      // Repeat sentinel values in every mask element.
1638
512
      if (M < 0) {
1639
0
        for (int s = 0; s != Scale; ++s)
1640
0
          ScaledMask[(Scale * i) + s] = M;
1641
0
        continue;
1642
0
      }
1643
512
1644
512
      // Scale mask element and increment across each mask element.
1645
1.53k
      
for (int s = 0; 512
s != Scale;
++s1.02k
)
1646
1.02k
        ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1647
512
    }
1648
32
  }
1649
} // end namespace llvm
1650
1651
#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H