Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCISelLowering.h
Line
Count
Source (jump to first uncovered line)
1
//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that PPC uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
15
#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
16
17
#include "PPCInstrInfo.h"
18
#include "llvm/CodeGen/CallingConvLower.h"
19
#include "llvm/CodeGen/MachineFunction.h"
20
#include "llvm/CodeGen/MachineMemOperand.h"
21
#include "llvm/CodeGen/SelectionDAG.h"
22
#include "llvm/CodeGen/SelectionDAGNodes.h"
23
#include "llvm/CodeGen/TargetLowering.h"
24
#include "llvm/CodeGen/ValueTypes.h"
25
#include "llvm/IR/Attributes.h"
26
#include "llvm/IR/CallingConv.h"
27
#include "llvm/IR/Function.h"
28
#include "llvm/IR/InlineAsm.h"
29
#include "llvm/IR/Metadata.h"
30
#include "llvm/IR/Type.h"
31
#include "llvm/Support/MachineValueType.h"
32
#include <utility>
33
34
namespace llvm {
35
36
  namespace PPCISD {
37
38
    // When adding a NEW PPCISD node please add it to the correct position in
39
    // the enum. The order of elements in this enum matters!
40
    // Values that are added after this entry:
41
    //     STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
42
    // are considered memory opcodes and are treated differently than entries
43
    // that come before it. For example, ADD or MUL should be placed before
44
    // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
45
    // after it.
46
    enum NodeType : unsigned {
47
      // Start the numbering where the builtin ops and target ops leave off.
48
      FIRST_NUMBER = ISD::BUILTIN_OP_END,
49
50
      /// FSEL - Traditional three-operand fsel node.
51
      ///
52
      FSEL,
53
54
      /// FCFID - The FCFID instruction, taking an f64 operand and producing
55
      /// and f64 value containing the FP representation of the integer that
56
      /// was temporarily in the f64 operand.
57
      FCFID,
58
59
      /// Newer FCFID[US] integer-to-floating-point conversion instructions for
60
      /// unsigned integers and single-precision outputs.
61
      FCFIDU, FCFIDS, FCFIDUS,
62
63
      /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
64
      /// operand, producing an f64 value containing the integer representation
65
      /// of that FP value.
66
      FCTIDZ, FCTIWZ,
67
68
      /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
69
      /// unsigned integers with round toward zero.
70
      FCTIDUZ, FCTIWUZ,
71
72
      /// Floating-point-to-interger conversion instructions
73
      FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR,
74
75
      /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
76
      /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
77
      VEXTS,
78
79
      /// SExtVElems, takes an input vector of a smaller type and sign
80
      /// extends to an output vector of a larger type.
81
      SExtVElems,
82
83
      /// Reciprocal estimate instructions (unary FP ops).
84
      FRE, FRSQRTE,
85
86
      // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
87
      // three v4f32 operands and producing a v4f32 result.
88
      VMADDFP, VNMSUBFP,
89
90
      /// VPERM - The PPC VPERM Instruction.
91
      ///
92
      VPERM,
93
94
      /// XXSPLT - The PPC VSX splat instructions
95
      ///
96
      XXSPLT,
97
98
      /// VECINSERT - The PPC vector insert instruction
99
      ///
100
      VECINSERT,
101
102
      /// XXREVERSE - The PPC VSX reverse instruction
103
      ///
104
      XXREVERSE,
105
106
      /// VECSHL - The PPC vector shift left instruction
107
      ///
108
      VECSHL,
109
110
      /// XXPERMDI - The PPC XXPERMDI instruction
111
      ///
112
      XXPERMDI,
113
114
      /// The CMPB instruction (takes two operands of i32 or i64).
115
      CMPB,
116
117
      /// Hi/Lo - These represent the high and low 16-bit parts of a global
118
      /// address respectively.  These nodes have two operands, the first of
119
      /// which must be a TargetGlobalAddress, and the second of which must be a
120
      /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
121
      /// though these are usually folded into other nodes.
122
      Hi, Lo,
123
124
      /// The following two target-specific nodes are used for calls through
125
      /// function pointers in the 64-bit SVR4 ABI.
126
127
      /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
128
      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
129
      /// compute an allocation on the stack.
130
      DYNALLOC,
131
132
      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
133
      /// compute an offset from native SP to the address  of the most recent
134
      /// dynamic alloca.
135
      DYNAREAOFFSET,
136
137
      /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
138
      /// at function entry, used for PIC code.
139
      GlobalBaseReg,
140
141
      /// These nodes represent PPC shifts.
142
      ///
143
      /// For scalar types, only the last `n + 1` bits of the shift amounts
144
      /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
145
      /// for exact behaviors.
146
      ///
147
      /// For vector types, only the last n bits are used. See vsld.
148
      SRL, SRA, SHL,
149
150
      /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
151
      /// word and shift left immediate.
152
      EXTSWSLI,
153
154
      /// The combination of sra[wd]i and addze used to implemented signed
155
      /// integer division by a power of 2. The first operand is the dividend,
156
      /// and the second is the constant shift amount (representing the
157
      /// divisor).
158
      SRA_ADDZE,
159
160
      /// CALL - A direct function call.
161
      /// CALL_NOP is a call with the special NOP which follows 64-bit
162
      /// SVR4 calls and 32-bit/64-bit AIX calls.
163
      CALL, CALL_NOP,
164
165
      /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
166
      /// MTCTR instruction.
167
      MTCTR,
168
169
      /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
170
      /// BCTRL instruction.
171
      BCTRL,
172
173
      /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
174
      /// instruction and the TOC reload required on SVR4 PPC64.
175
      BCTRL_LOAD_TOC,
176
177
      /// Return with a flag operand, matched by 'blr'
178
      RET_FLAG,
179
180
      /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
181
      /// This copies the bits corresponding to the specified CRREG into the
182
      /// resultant GPR.  Bits corresponding to other CR regs are undefined.
183
      MFOCRF,
184
185
      /// Direct move from a VSX register to a GPR
186
      MFVSR,
187
188
      /// Direct move from a GPR to a VSX register (algebraic)
189
      MTVSRA,
190
191
      /// Direct move from a GPR to a VSX register (zero)
192
      MTVSRZ,
193
194
      /// Direct move of 2 consecutive GPR to a VSX register.
195
      BUILD_FP128,
196
197
      /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
198
      /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
199
      /// unsupported for this target.
200
      /// Merge 2 GPRs to a single SPE register.
201
      BUILD_SPE64,
202
203
      /// Extract SPE register component, second argument is high or low.
204
      EXTRACT_SPE,
205
206
      /// Extract a subvector from signed integer vector and convert to FP.
207
      /// It is primarily used to convert a (widened) illegal integer vector
208
      /// type to a legal floating point vector type.
209
      /// For example v2i32 -> widened to v4i32 -> v2f64
210
      SINT_VEC_TO_FP,
211
212
      /// Extract a subvector from unsigned integer vector and convert to FP.
213
      /// As with SINT_VEC_TO_FP, used for converting illegal types.
214
      UINT_VEC_TO_FP,
215
216
      // FIXME: Remove these once the ANDI glue bug is fixed:
217
      /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
218
      /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
219
      /// implement truncation of i32 or i64 to i1.
220
      ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
221
222
      // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
223
      // target (returns (Lo, Hi)). It takes a chain operand.
224
      READ_TIME_BASE,
225
226
      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
227
      EH_SJLJ_SETJMP,
228
229
      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
230
      EH_SJLJ_LONGJMP,
231
232
      /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
233
      /// instructions.  For lack of better number, we use the opcode number
234
      /// encoding for the OPC field to identify the compare.  For example, 838
235
      /// is VCMPGTSH.
236
      VCMP,
237
238
      /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
239
      /// altivec VCMP*o instructions.  For lack of better number, we use the
240
      /// opcode number encoding for the OPC field to identify the compare.  For
241
      /// example, 838 is VCMPGTSH.
242
      VCMPo,
243
244
      /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
245
      /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
246
      /// condition register to branch on, OPC is the branch opcode to use (e.g.
247
      /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
248
      /// an optional input flag argument.
249
      COND_BRANCH,
250
251
      /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
252
      /// loops.
253
      BDNZ, BDZ,
254
255
      /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
256
      /// towards zero.  Used only as part of the long double-to-int
257
      /// conversion sequence.
258
      FADDRTZ,
259
260
      /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
261
      MFFS,
262
263
      /// TC_RETURN - A tail call return.
264
      ///   operand #0 chain
265
      ///   operand #1 callee (register or absolute)
266
      ///   operand #2 stack adjustment
267
      ///   operand #3 optional in flag
268
      TC_RETURN,
269
270
      /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
271
      CR6SET,
272
      CR6UNSET,
273
274
      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
275
      /// for non-position independent code on PPC32.
276
      PPC32_GOT,
277
278
      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
279
      /// local dynamic TLS and position indendepent code on PPC32.
280
      PPC32_PICGOT,
281
282
      /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
283
      /// TLS model, produces an ADDIS8 instruction that adds the GOT
284
      /// base to sym\@got\@tprel\@ha.
285
      ADDIS_GOT_TPREL_HA,
286
287
      /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
288
      /// TLS model, produces a LD instruction with base register G8RReg
289
      /// and offset sym\@got\@tprel\@l.  This completes the addition that
290
      /// finds the offset of "sym" relative to the thread pointer.
291
      LD_GOT_TPREL_L,
292
293
      /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
294
      /// model, produces an ADD instruction that adds the contents of
295
      /// G8RReg to the thread pointer.  Symbol contains a relocation
296
      /// sym\@tls which is to be replaced by the thread pointer and
297
      /// identifies to the linker that the instruction is part of a
298
      /// TLS sequence.
299
      ADD_TLS,
300
301
      /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
302
      /// model, produces an ADDIS8 instruction that adds the GOT base
303
      /// register to sym\@got\@tlsgd\@ha.
304
      ADDIS_TLSGD_HA,
305
306
      /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
307
      /// model, produces an ADDI8 instruction that adds G8RReg to
308
      /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
309
      /// ADDIS_TLSGD_L_ADDR until after register assignment.
310
      ADDI_TLSGD_L,
311
312
      /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
313
      /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
314
      /// ADDIS_TLSGD_L_ADDR until after register assignment.
315
      GET_TLS_ADDR,
316
317
      /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
318
      /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
319
      /// register assignment.
320
      ADDI_TLSGD_L_ADDR,
321
322
      /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
323
      /// model, produces an ADDIS8 instruction that adds the GOT base
324
      /// register to sym\@got\@tlsld\@ha.
325
      ADDIS_TLSLD_HA,
326
327
      /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
328
      /// model, produces an ADDI8 instruction that adds G8RReg to
329
      /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
330
      /// ADDIS_TLSLD_L_ADDR until after register assignment.
331
      ADDI_TLSLD_L,
332
333
      /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
334
      /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
335
      /// ADDIS_TLSLD_L_ADDR until after register assignment.
336
      GET_TLSLD_ADDR,
337
338
      /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
339
      /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
340
      /// following register assignment.
341
      ADDI_TLSLD_L_ADDR,
342
343
      /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
344
      /// model, produces an ADDIS8 instruction that adds X3 to
345
      /// sym\@dtprel\@ha.
346
      ADDIS_DTPREL_HA,
347
348
      /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
349
      /// model, produces an ADDI8 instruction that adds G8RReg to
350
      /// sym\@got\@dtprel\@l.
351
      ADDI_DTPREL_L,
352
353
      /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
354
      /// during instruction selection to optimize a BUILD_VECTOR into
355
      /// operations on splats.  This is necessary to avoid losing these
356
      /// optimizations due to constant folding.
357
      VADD_SPLAT,
358
359
      /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
360
      /// operand identifies the operating system entry point.
361
      SC,
362
363
      /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
364
      CLRBHRB,
365
366
      /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
367
      /// history rolling buffer entry.
368
      MFBHRBE,
369
370
      /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
371
      RFEBB,
372
373
      /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
374
      /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
375
      /// or stxvd2x instruction.  The chain is necessary because the
376
      /// sequence replaces a load and needs to provide the same number
377
      /// of outputs.
378
      XXSWAPD,
379
380
      /// An SDNode for swaps that are not associated with any loads/stores
381
      /// and thereby have no chain.
382
      SWAP_NO_CHAIN,
383
      
384
      /// An SDNode for Power9 vector absolute value difference.
385
      /// operand #0 vector
386
      /// operand #1 vector
387
      /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
388
      /// the most significant bit for signed i32
389
      ///
390
      /// Power9 VABSD* instructions are designed to support unsigned integer
391
      /// vectors (byte/halfword/word), if we want to make use of them for signed
392
      /// integer vectors, we have to flip their sign bits first. To flip sign bit
393
      /// for byte/halfword integer vector would become inefficient, but for word
394
      /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
395
      /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) 
396
      ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
397
      VABSD,
398
399
      /// QVFPERM = This corresponds to the QPX qvfperm instruction.
400
      QVFPERM,
401
402
      /// QVGPCI = This corresponds to the QPX qvgpci instruction.
403
      QVGPCI,
404
405
      /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
406
      QVALIGNI,
407
408
      /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
409
      QVESPLATI,
410
411
      /// QBFLT = Access the underlying QPX floating-point boolean
412
      /// representation.
413
      QBFLT,
414
415
      /// Custom extend v4f32 to v2f64.
416
      FP_EXTEND_LH,
417
418
      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
419
      /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
420
      /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
421
      /// i32.
422
      STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
423
424
      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
425
      /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
426
      /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
427
      /// or i32.
428
      LBRX,
429
430
      /// STFIWX - The STFIWX instruction.  The first operand is an input token
431
      /// chain, then an f64 value to store, then an address to store it to.
432
      STFIWX,
433
434
      /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
435
      /// load which sign-extends from a 32-bit integer value into the
436
      /// destination 64-bit register.
437
      LFIWAX,
438
439
      /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
440
      /// load which zero-extends from a 32-bit integer value into the
441
      /// destination 64-bit register.
442
      LFIWZX,
443
444
      /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
445
      /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
446
      /// This can be used for converting loaded integers to floating point.
447
      LXSIZX,
448
449
      /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
450
      /// chain, then an f64 value to store, then an address to store it to,
451
      /// followed by a byte-width for the store.
452
      STXSIX,
453
454
      /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
455
      /// Maps directly to an lxvd2x instruction that will be followed by
456
      /// an xxswapd.
457
      LXVD2X,
458
459
      /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
460
      /// v2f32 value into the lower half of a VSR register.
461
      LD_VSX_LH,
462
463
      /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
464
      /// Maps directly to an stxvd2x instruction that will be preceded by
465
      /// an xxswapd.
466
      STXVD2X,
467
468
      /// Store scalar integers from VSR.
469
      ST_VSR_SCAL_INT,
470
471
      /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
472
      /// The 4xf32 load used for v4i1 constants.
473
      QVLFSb,
474
475
      /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
476
      /// except they ensure that the compare input is zero-extended for
477
      /// sub-word versions because the atomic loads zero-extend.
478
      ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
479
480
      /// GPRC = TOC_ENTRY GA, TOC
481
      /// Loads the entry for GA from the TOC, where the TOC base is given by
482
      /// the last operand.
483
      TOC_ENTRY
484
    };
485
486
  } // end namespace PPCISD
487
488
  /// Define some predicates that are used for node matching.
489
  namespace PPC {
490
491
    /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
492
    /// VPKUHUM instruction.
493
    bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
494
                              SelectionDAG &DAG);
495
496
    /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
497
    /// VPKUWUM instruction.
498
    bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
499
                              SelectionDAG &DAG);
500
501
    /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
502
    /// VPKUDUM instruction.
503
    bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
504
                              SelectionDAG &DAG);
505
506
    /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
507
    /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
508
    bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
509
                            unsigned ShuffleKind, SelectionDAG &DAG);
510
511
    /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
512
    /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
513
    bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
514
                            unsigned ShuffleKind, SelectionDAG &DAG);
515
516
    /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
517
    /// a VMRGEW or VMRGOW instruction
518
    bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
519
                             unsigned ShuffleKind, SelectionDAG &DAG);
520
    /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
521
    /// for a XXSLDWI instruction.
522
    bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
523
                              bool &Swap, bool IsLE);
524
525
    /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
526
    /// for a XXBRH instruction.
527
    bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
528
529
    /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
530
    /// for a XXBRW instruction.
531
    bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
532
533
    /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
534
    /// for a XXBRD instruction.
535
    bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
536
537
    /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
538
    /// for a XXBRQ instruction.
539
    bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
540
541
    /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
542
    /// for a XXPERMDI instruction.
543
    bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
544
                              bool &Swap, bool IsLE);
545
546
    /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
547
    /// shift amount, otherwise return -1.
548
    int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
549
                            SelectionDAG &DAG);
550
551
    /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
552
    /// specifies a splat of a single element that is suitable for input to
553
    /// VSPLTB/VSPLTH/VSPLTW.
554
    bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
555
556
    /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
557
    /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
558
    /// shuffle of v4f32/v4i32 vectors that just inserts one element from one
559
    /// vector into the other. This function will also set a couple of
560
    /// output parameters for how much the source vector needs to be shifted and
561
    /// what byte number needs to be specified for the instruction to put the
562
    /// element in the desired location of the target vector.
563
    bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
564
                         unsigned &InsertAtByte, bool &Swap, bool IsLE);
565
566
    /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
567
    /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
568
    unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
569
570
    /// get_VSPLTI_elt - If this is a build_vector of constants which can be
571
    /// formed by using a vspltis[bhw] instruction of the specified element
572
    /// size, return the constant being splatted.  The ByteSize field indicates
573
    /// the number of bytes of each element [124] -> [bhw].
574
    SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
575
576
    /// If this is a qvaligni shuffle mask, return the shift
577
    /// amount, otherwise return -1.
578
    int isQVALIGNIShuffleMask(SDNode *N);
579
580
  } // end namespace PPC
581
582
  class PPCTargetLowering : public TargetLowering {
583
    const PPCSubtarget &Subtarget;
584
585
  public:
586
    explicit PPCTargetLowering(const PPCTargetMachine &TM,
587
                               const PPCSubtarget &STI);
588
589
    /// getTargetNodeName() - This method returns the name of a target specific
590
    /// DAG node.
591
    const char *getTargetNodeName(unsigned Opcode) const override;
592
593
862
    bool isSelectSupported(SelectSupportKind Kind) const override {
594
862
      // PowerPC does not support scalar condition selects on vectors.
595
862
      return (Kind != SelectSupportKind::ScalarCondVectorVal);
596
862
    }
597
598
    /// getPreferredVectorAction - The code we generate when vector types are
599
    /// legalized by promoting the integer element type is often much worse
600
    /// than code we generate if we widen the type for applicable vector types.
601
    /// The issue with promoting is that the vector is scalaraized, individual
602
    /// elements promoted and then the vector is rebuilt. So say we load a pair
603
    /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
604
    /// loads, moves back into VSR's (or memory ops if we don't have moves) and
605
    /// then the VPERM for the shuffle. All in all a very slow sequence.
606
    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
607
197k
      const override {
608
197k
      if (VT.getScalarSizeInBits() % 8 == 0)
609
168k
        return TypeWidenVector;
610
29.5k
      return TargetLoweringBase::getPreferredVectorAction(VT);
611
29.5k
    }
612
613
    bool useSoftFloat() const override;
614
615
    bool hasSPE() const;
616
617
3.92k
    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
618
3.92k
      return MVT::i32;
619
3.92k
    }
620
621
9
    bool isCheapToSpeculateCttz() const override {
622
9
      return true;
623
9
    }
624
625
12
    bool isCheapToSpeculateCtlz() const override {
626
12
      return true;
627
12
    }
628
629
23
    bool isCtlzFast() const override {
630
23
      return true;
631
23
    }
632
633
183
    bool hasAndNotCompare(SDValue) const override {
634
183
      return true;
635
183
    }
636
637
    bool preferIncOfAddToSubOfNot(EVT VT) const override;
638
639
82
    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
640
82
      return VT.isScalarInteger();
641
82
    }
642
643
11.0k
    bool supportSplitCSR(MachineFunction *MF) const override {
644
11.0k
      return
645
11.0k
        MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
646
11.0k
        
MF->getFunction().hasFnAttribute(Attribute::NoUnwind)3
;
647
11.0k
    }
648
649
    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
650
651
    void insertCopiesSplitCSR(
652
      MachineBasicBlock *Entry,
653
      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
654
655
    /// getSetCCResultType - Return the ISD::SETCC ValueType
656
    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
657
                           EVT VT) const override;
658
659
    /// Return true if target always beneficiates from combining into FMA for a
660
    /// given value type. This must typically return false on targets where FMA
661
    /// takes more cycles to execute than FADD.
662
    bool enableAggressiveFMAFusion(EVT VT) const override;
663
664
    /// getPreIndexedAddressParts - returns true by value, base pointer and
665
    /// offset pointer and addressing mode by reference if the node's address
666
    /// can be legally represented as pre-indexed load / store address.
667
    bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
668
                                   SDValue &Offset,
669
                                   ISD::MemIndexedMode &AM,
670
                                   SelectionDAG &DAG) const override;
671
672
    /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
673
    /// it can be more efficiently represented as [r+imm].
674
    bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
675
                                SelectionDAG &DAG) const;
676
677
    /// SelectAddressRegReg - Given the specified addressed, check to see if it
678
    /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
679
    /// is non-zero, only accept displacement which is not suitable for [r+imm].
680
    /// Returns false if it can be represented by [r+imm], which are preferred.
681
    bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
682
                             SelectionDAG &DAG,
683
                             unsigned EncodingAlignment = 0) const;
684
685
    /// SelectAddressRegImm - Returns true if the address N can be represented
686
    /// by a base register plus a signed 16-bit displacement [r+imm], and if it
687
    /// is not better represented as reg+reg. If \p EncodingAlignment is
688
    /// non-zero, only accept displacements suitable for instruction encoding
689
    /// requirement, i.e. multiples of 4 for DS form.
690
    bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
691
                             SelectionDAG &DAG,
692
                             unsigned EncodingAlignment) const;
693
694
    /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
695
    /// represented as an indexed [r+r] operation.
696
    bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
697
                                 SelectionDAG &DAG) const;
698
699
    Sched::Preference getSchedulingPreference(SDNode *N) const override;
700
701
    /// LowerOperation - Provide custom lowering hooks for some operations.
702
    ///
703
    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
704
705
    /// ReplaceNodeResults - Replace the results of node with an illegal result
706
    /// type with new values built out of custom code.
707
    ///
708
    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
709
                            SelectionDAG &DAG) const override;
710
711
    SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
712
    SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
713
714
    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
715
716
    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
717
                          SmallVectorImpl<SDNode *> &Created) const override;
718
719
    unsigned getRegisterByName(const char* RegName, EVT VT,
720
                               SelectionDAG &DAG) const override;
721
722
    void computeKnownBitsForTargetNode(const SDValue Op,
723
                                       KnownBits &Known,
724
                                       const APInt &DemandedElts,
725
                                       const SelectionDAG &DAG,
726
                                       unsigned Depth = 0) const override;
727
728
    unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
729
730
687
    bool shouldInsertFencesForAtomic(const Instruction *I) const override {
731
687
      return true;
732
687
    }
733
734
    Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
735
                                  AtomicOrdering Ord) const override;
736
    Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
737
                                   AtomicOrdering Ord) const override;
738
739
    MachineBasicBlock *
740
    EmitInstrWithCustomInserter(MachineInstr &MI,
741
                                MachineBasicBlock *MBB) const override;
742
    MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,
743
                                        MachineBasicBlock *MBB,
744
                                        unsigned AtomicSize,
745
                                        unsigned BinOpcode,
746
                                        unsigned CmpOpcode = 0,
747
                                        unsigned CmpPred = 0) const;
748
    MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,
749
                                                MachineBasicBlock *MBB,
750
                                                bool is8bit,
751
                                                unsigned Opcode,
752
                                                unsigned CmpOpcode = 0,
753
                                                unsigned CmpPred = 0) const;
754
755
    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
756
                                        MachineBasicBlock *MBB) const;
757
758
    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
759
                                         MachineBasicBlock *MBB) const;
760
761
    ConstraintType getConstraintType(StringRef Constraint) const override;
762
763
    /// Examine constraint string and operand type and determine a weight value.
764
    /// The operand object must already have been set up with the operand type.
765
    ConstraintWeight getSingleConstraintMatchWeight(
766
      AsmOperandInfo &info, const char *constraint) const override;
767
768
    std::pair<unsigned, const TargetRegisterClass *>
769
    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
770
                                 StringRef Constraint, MVT VT) const override;
771
772
    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
773
    /// function arguments in the caller parameter area.  This is the actual
774
    /// alignment, not its logarithm.
775
    unsigned getByValTypeAlignment(Type *Ty,
776
                                   const DataLayout &DL) const override;
777
778
    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
779
    /// vector.  If it is invalid, don't add anything to Ops.
780
    void LowerAsmOperandForConstraint(SDValue Op,
781
                                      std::string &Constraint,
782
                                      std::vector<SDValue> &Ops,
783
                                      SelectionDAG &DAG) const override;
784
785
    unsigned
786
59
    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
787
59
      if (ConstraintCode == "es")
788
0
        return InlineAsm::Constraint_es;
789
59
      else if (ConstraintCode == "o")
790
1
        return InlineAsm::Constraint_o;
791
58
      else if (ConstraintCode == "Q")
792
0
        return InlineAsm::Constraint_Q;
793
58
      else if (ConstraintCode == "Z")
794
1
        return InlineAsm::Constraint_Z;
795
57
      else if (ConstraintCode == "Zy")
796
0
        return InlineAsm::Constraint_Zy;
797
57
      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
798
57
    }
799
800
    /// isLegalAddressingMode - Return true if the addressing mode represented
801
    /// by AM is legal for this target, for a load/store of the specified type.
802
    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
803
                               Type *Ty, unsigned AS,
804
                               Instruction *I = nullptr) const override;
805
806
    /// isLegalICmpImmediate - Return true if the specified immediate is legal
807
    /// icmp immediate, that is the target has icmp instructions which can
808
    /// compare a register against the immediate without having to materialize
809
    /// the immediate into a register.
810
    bool isLegalICmpImmediate(int64_t Imm) const override;
811
812
    /// isLegalAddImmediate - Return true if the specified immediate is legal
813
    /// add immediate, that is the target has add instructions which can
814
    /// add a register and the immediate without having to materialize
815
    /// the immediate into a register.
816
    bool isLegalAddImmediate(int64_t Imm) const override;
817
818
    /// isTruncateFree - Return true if it's free to truncate a value of
819
    /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
820
    /// register X1 to i32 by referencing its sub-register R1.
821
    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
822
    bool isTruncateFree(EVT VT1, EVT VT2) const override;
823
824
    bool isZExtFree(SDValue Val, EVT VT2) const override;
825
826
    bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
827
828
    /// Returns true if it is beneficial to convert a load of a constant
829
    /// to just the constant itself.
830
    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
831
                                           Type *Ty) const override;
832
833
1.71k
    bool convertSelectOfConstantsToMath(EVT VT) const override {
834
1.71k
      return true;
835
1.71k
    }
836
837
    bool isDesirableToTransformToIntegerOp(unsigned Opc,
838
84
                                           EVT VT) const override {
839
84
      // Only handle float load/store pair because float(fpr) load/store
840
84
      // instruction has more cycles than integer(gpr) load/store in PPC.
841
84
      if (Opc != ISD::LOAD && 
Opc != ISD::STORE42
)
842
0
        return false;
843
84
      if (VT != MVT::f32 && 
VT != MVT::f6420
)
844
0
        return false;
845
84
846
84
      return true; 
847
84
    }
848
849
    // Returns true if the address of the global is stored in TOC entry.
850
    bool isAccessedAsGotIndirect(SDValue N) const;
851
852
    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
853
854
    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
855
                            const CallInst &I,
856
                            MachineFunction &MF,
857
                            unsigned Intrinsic) const override;
858
859
    /// getOptimalMemOpType - Returns the target specific optimal type for load
860
    /// and store operations as a result of memset, memcpy, and memmove
861
    /// lowering. If DstAlign is zero that means it's safe to destination
862
    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
863
    /// means there isn't a need to check it against alignment requirement,
864
    /// probably because the source does not need to be loaded. If 'IsMemset' is
865
    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
866
    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
867
    /// source is constant so it does not need to be loaded.
868
    /// It returns EVT::Other if the type should be determined using generic
869
    /// target-independent logic.
870
    EVT
871
    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
872
                        bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
873
                        const AttributeList &FuncAttributes) const override;
874
875
    /// Is unaligned memory access allowed for the given type, and is it fast
876
    /// relative to software emulation.
877
    bool allowsMisalignedMemoryAccesses(
878
        EVT VT, unsigned AddrSpace, unsigned Align = 1,
879
        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
880
        bool *Fast = nullptr) const override;
881
882
    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
883
    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
884
    /// expanded to FMAs when this method returns true, otherwise fmuladd is
885
    /// expanded to fmul + fadd.
886
    bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
887
888
    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
889
890
    // Should we expand the build vector with shuffles?
891
    bool
892
    shouldExpandBuildVectorWithShuffles(EVT VT,
893
                                        unsigned DefinedValues) const override;
894
895
    /// createFastISel - This method returns a target-specific FastISel object,
896
    /// or null if the target does not support "fast" instruction selection.
897
    FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
898
                             const TargetLibraryInfo *LibInfo) const override;
899
900
    /// Returns true if an argument of type Ty needs to be passed in a
901
    /// contiguous block of registers in calling convention CallConv.
902
    bool functionArgumentNeedsConsecutiveRegisters(
903
34.4k
      Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
904
34.4k
      // We support any array type as "consecutive" block in the parameter
905
34.4k
      // save area.  The element type defines the alignment requirement and
906
34.4k
      // whether the argument should go in GPRs, FPRs, or VRs if available.
907
34.4k
      //
908
34.4k
      // Note that clang uses this capability both to implement the ELFv2
909
34.4k
      // homogeneous float/vector aggregate ABI, and to avoid having to use
910
34.4k
      // "byval" when passing aggregates that might fully fit in registers.
911
34.4k
      return Ty->isArrayTy();
912
34.4k
    }
913
914
    /// If a physical register, this returns the register that receives the
915
    /// exception address on entry to an EH pad.
916
    unsigned
917
    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
918
919
    /// If a physical register, this returns the register that receives the
920
    /// exception typeid on entry to a landing pad.
921
    unsigned
922
    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
923
924
    /// Override to support customized stack guard loading.
925
    bool useLoadStackGuardNode() const override;
926
    void insertSSPDeclarations(Module &M) const override;
927
928
    bool isFPImmLegal(const APFloat &Imm, EVT VT,
929
                      bool ForCodeSize) const override;
930
931
    unsigned getJumpTableEncoding() const override;
932
    bool isJumpTableRelative() const override;
933
    SDValue getPICJumpTableRelocBase(SDValue Table,
934
                                     SelectionDAG &DAG) const override;
935
    const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
936
                                               unsigned JTI,
937
                                               MCContext &Ctx) const override;
938
939
  private:
940
    struct ReuseLoadInfo {
941
      SDValue Ptr;
942
      SDValue Chain;
943
      SDValue ResChain;
944
      MachinePointerInfo MPI;
945
      bool IsDereferenceable = false;
946
      bool IsInvariant = false;
947
      unsigned Alignment = 0;
948
      AAMDNodes AAInfo;
949
      const MDNode *Ranges = nullptr;
950
951
146
      ReuseLoadInfo() = default;
952
953
75
      MachineMemOperand::Flags MMOFlags() const {
954
75
        MachineMemOperand::Flags F = MachineMemOperand::MONone;
955
75
        if (IsDereferenceable)
956
0
          F |= MachineMemOperand::MODereferenceable;
957
75
        if (IsInvariant)
958
0
          F |= MachineMemOperand::MOInvariant;
959
75
        return F;
960
75
      }
961
    };
962
963
5
    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
964
5
      // Addrspacecasts are always noops.
965
5
      return true;
966
5
    }
967
968
    bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
969
                             SelectionDAG &DAG,
970
                             ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
971
    void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
972
                         SelectionDAG &DAG) const;
973
974
    void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
975
                                SelectionDAG &DAG, const SDLoc &dl) const;
976
    SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
977
                                     const SDLoc &dl) const;
978
979
    bool directMoveIsProfitable(const SDValue &Op) const;
980
    SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
981
                                     const SDLoc &dl) const;
982
983
    SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
984
                                 const SDLoc &dl) const;
985
986
    SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
987
988
    SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
989
    SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
990
991
    bool
992
    IsEligibleForTailCallOptimization(SDValue Callee,
993
                                      CallingConv::ID CalleeCC,
994
                                      bool isVarArg,
995
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
996
                                      SelectionDAG& DAG) const;
997
998
    bool
999
    IsEligibleForTailCallOptimization_64SVR4(
1000
                                    SDValue Callee,
1001
                                    CallingConv::ID CalleeCC,
1002
                                    ImmutableCallSite CS,
1003
                                    bool isVarArg,
1004
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1005
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1006
                                    SelectionDAG& DAG) const;
1007
1008
    SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
1009
                                         SDValue Chain, SDValue &LROpOut,
1010
                                         SDValue &FPOpOut,
1011
                                         const SDLoc &dl) const;
1012
1013
    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1014
    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1015
    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1016
    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1017
    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1018
    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1019
    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1020
    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1021
    SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1022
    SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1023
    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1024
    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1025
    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1026
    SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
1027
    SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1028
    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1029
    SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
1030
    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1031
    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1032
    SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1033
    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1034
    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
1035
                           const SDLoc &dl) const;
1036
    SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1037
    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1038
    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
1039
    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
1040
    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
1041
    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1042
    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1043
    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1044
    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1045
    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1046
    SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1047
    SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
1048
    SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
1049
    SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
1050
    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1051
    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
1052
    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1053
    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1054
    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1055
1056
    SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
1057
    SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
1058
1059
    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1060
                            CallingConv::ID CallConv, bool isVarArg,
1061
                            const SmallVectorImpl<ISD::InputArg> &Ins,
1062
                            const SDLoc &dl, SelectionDAG &DAG,
1063
                            SmallVectorImpl<SDValue> &InVals) const;
1064
    SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl,
1065
                       bool isTailCall, bool isVarArg, bool isPatchPoint,
1066
                       bool hasNest, SelectionDAG &DAG,
1067
                       SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
1068
                       SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
1069
                       SDValue &Callee, int SPDiff, unsigned NumBytes,
1070
                       const SmallVectorImpl<ISD::InputArg> &Ins,
1071
                       SmallVectorImpl<SDValue> &InVals,
1072
                       ImmutableCallSite CS) const;
1073
1074
    SDValue
1075
    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1076
                         const SmallVectorImpl<ISD::InputArg> &Ins,
1077
                         const SDLoc &dl, SelectionDAG &DAG,
1078
                         SmallVectorImpl<SDValue> &InVals) const override;
1079
1080
    SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
1081
                      SmallVectorImpl<SDValue> &InVals) const override;
1082
1083
    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1084
                        bool isVarArg,
1085
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1086
                        LLVMContext &Context) const override;
1087
1088
    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1089
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1090
                        const SmallVectorImpl<SDValue> &OutVals,
1091
                        const SDLoc &dl, SelectionDAG &DAG) const override;
1092
1093
    SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
1094
                              SelectionDAG &DAG, SDValue ArgVal,
1095
                              const SDLoc &dl) const;
1096
1097
    SDValue LowerFormalArguments_Darwin(
1098
        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1099
        const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1100
        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1101
    SDValue LowerFormalArguments_64SVR4(
1102
        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1103
        const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1104
        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1105
    SDValue LowerFormalArguments_32SVR4(
1106
        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1107
        const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1108
        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1109
1110
    SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
1111
                                       SDValue CallSeqStart,
1112
                                       ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
1113
                                       const SDLoc &dl) const;
1114
1115
    SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee,
1116
                             CallingConv::ID CallConv, bool isVarArg,
1117
                             bool isTailCall, bool isPatchPoint,
1118
                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1119
                             const SmallVectorImpl<SDValue> &OutVals,
1120
                             const SmallVectorImpl<ISD::InputArg> &Ins,
1121
                             const SDLoc &dl, SelectionDAG &DAG,
1122
                             SmallVectorImpl<SDValue> &InVals,
1123
                             ImmutableCallSite CS) const;
1124
    SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee,
1125
                             CallingConv::ID CallConv, bool isVarArg,
1126
                             bool isTailCall, bool isPatchPoint,
1127
                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1128
                             const SmallVectorImpl<SDValue> &OutVals,
1129
                             const SmallVectorImpl<ISD::InputArg> &Ins,
1130
                             const SDLoc &dl, SelectionDAG &DAG,
1131
                             SmallVectorImpl<SDValue> &InVals,
1132
                             ImmutableCallSite CS) const;
1133
    SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee,
1134
                             CallingConv::ID CallConv, bool isVarArg,
1135
                             bool isTailCall, bool isPatchPoint,
1136
                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1137
                             const SmallVectorImpl<SDValue> &OutVals,
1138
                             const SmallVectorImpl<ISD::InputArg> &Ins,
1139
                             const SDLoc &dl, SelectionDAG &DAG,
1140
                             SmallVectorImpl<SDValue> &InVals,
1141
                             ImmutableCallSite CS) const;
1142
    SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
1143
                          CallingConv::ID CallConv, bool isVarArg,
1144
                          bool isTailCall, bool isPatchPoint,
1145
                          const SmallVectorImpl<ISD::OutputArg> &Outs,
1146
                          const SmallVectorImpl<SDValue> &OutVals,
1147
                          const SmallVectorImpl<ISD::InputArg> &Ins,
1148
                          const SDLoc &dl, SelectionDAG &DAG,
1149
                          SmallVectorImpl<SDValue> &InVals,
1150
                          ImmutableCallSite CS) const;
1151
1152
    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1153
    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1154
    SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1155
1156
    SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
1157
    SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
1158
    SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
1159
    SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
1160
    SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
1161
    SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
1162
    SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
1163
    SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
1164
    SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
1165
    SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
1166
    SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
1167
    SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
1168
    SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
1169
    SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
1170
1171
    /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
1172
    /// SETCC with integer subtraction when (1) there is a legal way of doing it
1173
    /// (2) keeping the result of comparison in GPR has performance benefit.
1174
    SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
1175
1176
    SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1177
                            int &RefinementSteps, bool &UseOneConstNR,
1178
                            bool Reciprocal) const override;
1179
    SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1180
                             int &RefinementSteps) const override;
1181
    unsigned combineRepeatedFPDivisors() const override;
1182
1183
    SDValue
1184
    combineElementTruncationToVectorTruncation(SDNode *N,
1185
                                               DAGCombinerInfo &DCI) const;
1186
1187
    /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
1188
    /// handled by the VINSERTH instruction introduced in ISA 3.0. This is
1189
    /// essentially any shuffle of v8i16 vectors that just inserts one element
1190
    /// from one vector into the other.
1191
    SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1192
1193
    /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
1194
    /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
1195
    /// essentially v16i8 vector version of VINSERTH.
1196
    SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1197
1198
    // Return whether the call instruction can potentially be optimized to a
1199
    // tail call. This will cause the optimizers to attempt to move, or
1200
    // duplicate return instructions to help enable tail call optimizations.
1201
    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1202
    bool hasBitPreservingFPLogic(EVT VT) const override;
1203
    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1204
  }; // end class PPCTargetLowering
1205
1206
  namespace PPC {
1207
1208
    FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
1209
                             const TargetLibraryInfo *LibInfo);
1210
1211
  } // end namespace PPC
1212
1213
  bool isIntS16Immediate(SDNode *N, int16_t &Imm);
1214
  bool isIntS16Immediate(SDValue Op, int16_t &Imm);
1215
1216
} // end namespace llvm
1217
1218
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H