Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the PPCISelLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "PPCISelLowering.h"
14
#include "MCTargetDesc/PPCPredicates.h"
15
#include "PPC.h"
16
#include "PPCCCState.h"
17
#include "PPCCallingConv.h"
18
#include "PPCFrameLowering.h"
19
#include "PPCInstrInfo.h"
20
#include "PPCMachineFunctionInfo.h"
21
#include "PPCPerfectShuffle.h"
22
#include "PPCRegisterInfo.h"
23
#include "PPCSubtarget.h"
24
#include "PPCTargetMachine.h"
25
#include "llvm/ADT/APFloat.h"
26
#include "llvm/ADT/APInt.h"
27
#include "llvm/ADT/ArrayRef.h"
28
#include "llvm/ADT/DenseMap.h"
29
#include "llvm/ADT/None.h"
30
#include "llvm/ADT/STLExtras.h"
31
#include "llvm/ADT/SmallPtrSet.h"
32
#include "llvm/ADT/SmallSet.h"
33
#include "llvm/ADT/SmallVector.h"
34
#include "llvm/ADT/Statistic.h"
35
#include "llvm/ADT/StringRef.h"
36
#include "llvm/ADT/StringSwitch.h"
37
#include "llvm/CodeGen/CallingConvLower.h"
38
#include "llvm/CodeGen/ISDOpcodes.h"
39
#include "llvm/CodeGen/MachineBasicBlock.h"
40
#include "llvm/CodeGen/MachineFrameInfo.h"
41
#include "llvm/CodeGen/MachineFunction.h"
42
#include "llvm/CodeGen/MachineInstr.h"
43
#include "llvm/CodeGen/MachineInstrBuilder.h"
44
#include "llvm/CodeGen/MachineJumpTableInfo.h"
45
#include "llvm/CodeGen/MachineLoopInfo.h"
46
#include "llvm/CodeGen/MachineMemOperand.h"
47
#include "llvm/CodeGen/MachineModuleInfo.h"
48
#include "llvm/CodeGen/MachineOperand.h"
49
#include "llvm/CodeGen/MachineRegisterInfo.h"
50
#include "llvm/CodeGen/RuntimeLibcalls.h"
51
#include "llvm/CodeGen/SelectionDAG.h"
52
#include "llvm/CodeGen/SelectionDAGNodes.h"
53
#include "llvm/CodeGen/TargetInstrInfo.h"
54
#include "llvm/CodeGen/TargetLowering.h"
55
#include "llvm/CodeGen/TargetRegisterInfo.h"
56
#include "llvm/CodeGen/ValueTypes.h"
57
#include "llvm/IR/CallSite.h"
58
#include "llvm/IR/CallingConv.h"
59
#include "llvm/IR/Constant.h"
60
#include "llvm/IR/Constants.h"
61
#include "llvm/IR/DataLayout.h"
62
#include "llvm/IR/DebugLoc.h"
63
#include "llvm/IR/DerivedTypes.h"
64
#include "llvm/IR/Function.h"
65
#include "llvm/IR/GlobalValue.h"
66
#include "llvm/IR/IRBuilder.h"
67
#include "llvm/IR/Instructions.h"
68
#include "llvm/IR/Intrinsics.h"
69
#include "llvm/IR/Module.h"
70
#include "llvm/IR/Type.h"
71
#include "llvm/IR/Use.h"
72
#include "llvm/IR/Value.h"
73
#include "llvm/MC/MCContext.h"
74
#include "llvm/MC/MCExpr.h"
75
#include "llvm/MC/MCRegisterInfo.h"
76
#include "llvm/MC/MCSymbolXCOFF.h"
77
#include "llvm/Support/AtomicOrdering.h"
78
#include "llvm/Support/BranchProbability.h"
79
#include "llvm/Support/Casting.h"
80
#include "llvm/Support/CodeGen.h"
81
#include "llvm/Support/CommandLine.h"
82
#include "llvm/Support/Compiler.h"
83
#include "llvm/Support/Debug.h"
84
#include "llvm/Support/ErrorHandling.h"
85
#include "llvm/Support/Format.h"
86
#include "llvm/Support/KnownBits.h"
87
#include "llvm/Support/MachineValueType.h"
88
#include "llvm/Support/MathExtras.h"
89
#include "llvm/Support/raw_ostream.h"
90
#include "llvm/Target/TargetMachine.h"
91
#include "llvm/Target/TargetOptions.h"
92
#include <algorithm>
93
#include <cassert>
94
#include <cstdint>
95
#include <iterator>
96
#include <list>
97
#include <utility>
98
#include <vector>
99
100
using namespace llvm;
101
102
#define DEBUG_TYPE "ppc-lowering"
103
104
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
105
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
106
107
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
108
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
109
110
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
112
113
static cl::opt<bool> DisableSCO("disable-ppc-sco",
114
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
115
116
static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117
cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
118
119
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
121
122
STATISTIC(NumTailCalls, "Number of tail calls");
123
STATISTIC(NumSiblingCalls, "Number of sibling calls");
124
125
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
126
127
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
128
129
// FIXME: Remove this once the bug has been fixed!
130
extern cl::opt<bool> ANDIGlueBug;
131
132
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
133
                                     const PPCSubtarget &STI)
134
1.85k
    : TargetLowering(TM), Subtarget(STI) {
135
1.85k
  // Use _setjmp/_longjmp instead of setjmp/longjmp.
136
1.85k
  setUseUnderscoreSetJmp(true);
137
1.85k
  setUseUnderscoreLongJmp(true);
138
1.85k
139
1.85k
  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140
1.85k
  // arguments are at least 4/8 bytes aligned.
141
1.85k
  bool isPPC64 = Subtarget.isPPC64();
142
1.85k
  setMinStackArgumentAlignment(isPPC64 ? 
81.46k
:
4387
);
143
1.85k
144
1.85k
  // Set up the register classes.
145
1.85k
  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146
1.85k
  if (!useSoftFloat()) {
147
1.84k
    if (hasSPE()) {
148
4
      addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
149
4
      addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150
1.84k
    } else {
151
1.84k
      addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152
1.84k
      addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153
1.84k
    }
154
1.84k
  }
155
1.85k
156
1.85k
  // Match BITREVERSE to customized fast code sequence in the td file.
157
1.85k
  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158
1.85k
  setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159
1.85k
160
1.85k
  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161
1.85k
  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162
1.85k
163
1.85k
  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164
11.1k
  for (MVT VT : MVT::integer_valuetypes()) {
165
11.1k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166
11.1k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167
11.1k
  }
168
1.85k
169
1.85k
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
170
1.85k
171
1.85k
  // PowerPC has pre-inc load and store's.
172
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
173
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
174
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
175
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
176
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
177
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
178
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
179
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
180
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
181
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
182
1.85k
  if (!Subtarget.hasSPE()) {
183
1.84k
    setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
184
1.84k
    setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
185
1.84k
    setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
186
1.84k
    setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
187
1.84k
  }
188
1.85k
189
1.85k
  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190
1.85k
  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
191
3.70k
  for (MVT VT : ScalarIntVTs) {
192
3.70k
    setOperationAction(ISD::ADDC, VT, Legal);
193
3.70k
    setOperationAction(ISD::ADDE, VT, Legal);
194
3.70k
    setOperationAction(ISD::SUBC, VT, Legal);
195
3.70k
    setOperationAction(ISD::SUBE, VT, Legal);
196
3.70k
  }
197
1.85k
198
1.85k
  if (Subtarget.useCRBits()) {
199
1.62k
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
200
1.62k
201
1.62k
    if (isPPC64 || 
Subtarget.hasFPCVT()361
) {
202
1.26k
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
203
1.26k
      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
204
1.26k
                         isPPC64 ? 
MVT::i641.25k
:
MVT::i329
);
205
1.26k
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
206
1.26k
      AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
207
1.26k
                        isPPC64 ? 
MVT::i641.25k
:
MVT::i329
);
208
1.26k
    } else {
209
352
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
210
352
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
211
352
    }
212
1.62k
213
1.62k
    // PowerPC does not support direct load/store of condition registers.
214
1.62k
    setOperationAction(ISD::LOAD, MVT::i1, Custom);
215
1.62k
    setOperationAction(ISD::STORE, MVT::i1, Custom);
216
1.62k
217
1.62k
    // FIXME: Remove this once the ANDI glue bug is fixed:
218
1.62k
    if (ANDIGlueBug)
219
0
      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
220
1.62k
221
9.72k
    for (MVT VT : MVT::integer_valuetypes()) {
222
9.72k
      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
223
9.72k
      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
224
9.72k
      setTruncStoreAction(VT, MVT::i1, Expand);
225
9.72k
    }
226
1.62k
227
1.62k
    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
228
1.62k
  }
229
1.85k
230
1.85k
  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231
1.85k
  // PPC (the libcall is not available).
232
1.85k
  setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
233
1.85k
  setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
234
1.85k
235
1.85k
  // We do not currently implement these libm ops for PowerPC.
236
1.85k
  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
237
1.85k
  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
238
1.85k
  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
239
1.85k
  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
240
1.85k
  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
241
1.85k
  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
242
1.85k
243
1.85k
  // PowerPC has no SREM/UREM instructions unless we are on P9
244
1.85k
  // On P9 we may use a hardware instruction to compute the remainder.
245
1.85k
  // The instructions are not legalized directly because in the cases where the
246
1.85k
  // result of both the remainder and the division is required it is more
247
1.85k
  // efficient to compute the remainder from the result of the division rather
248
1.85k
  // than use the remainder instruction.
249
1.85k
  if (Subtarget.isISA3_0()) {
250
214
    setOperationAction(ISD::SREM, MVT::i32, Custom);
251
214
    setOperationAction(ISD::UREM, MVT::i32, Custom);
252
214
    setOperationAction(ISD::SREM, MVT::i64, Custom);
253
214
    setOperationAction(ISD::UREM, MVT::i64, Custom);
254
1.63k
  } else {
255
1.63k
    setOperationAction(ISD::SREM, MVT::i32, Expand);
256
1.63k
    setOperationAction(ISD::UREM, MVT::i32, Expand);
257
1.63k
    setOperationAction(ISD::SREM, MVT::i64, Expand);
258
1.63k
    setOperationAction(ISD::UREM, MVT::i64, Expand);
259
1.63k
  }
260
1.85k
261
1.85k
  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262
1.85k
  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
263
1.85k
  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
264
1.85k
  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
265
1.85k
  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
266
1.85k
  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
267
1.85k
  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
268
1.85k
  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
269
1.85k
  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
270
1.85k
271
1.85k
  // We don't support sin/cos/sqrt/fmod/pow
272
1.85k
  setOperationAction(ISD::FSIN , MVT::f64, Expand);
273
1.85k
  setOperationAction(ISD::FCOS , MVT::f64, Expand);
274
1.85k
  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
275
1.85k
  setOperationAction(ISD::FREM , MVT::f64, Expand);
276
1.85k
  setOperationAction(ISD::FPOW , MVT::f64, Expand);
277
1.85k
  setOperationAction(ISD::FSIN , MVT::f32, Expand);
278
1.85k
  setOperationAction(ISD::FCOS , MVT::f32, Expand);
279
1.85k
  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
280
1.85k
  setOperationAction(ISD::FREM , MVT::f32, Expand);
281
1.85k
  setOperationAction(ISD::FPOW , MVT::f32, Expand);
282
1.85k
  if (Subtarget.hasSPE()) {
283
4
    setOperationAction(ISD::FMA  , MVT::f64, Expand);
284
4
    setOperationAction(ISD::FMA  , MVT::f32, Expand);
285
1.84k
  } else {
286
1.84k
    setOperationAction(ISD::FMA  , MVT::f64, Legal);
287
1.84k
    setOperationAction(ISD::FMA  , MVT::f32, Legal);
288
1.84k
  }
289
1.85k
290
1.85k
  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
291
1.85k
292
1.85k
  // If we're enabling GP optimizations, use hardware square root
293
1.85k
  if (!Subtarget.hasFSQRT() &&
294
1.85k
      
!(572
TM.Options.UnsafeFPMath572
&&
Subtarget.hasFRSQRTE()3
&&
295
572
        
Subtarget.hasFRE()0
))
296
572
    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
297
1.85k
298
1.85k
  if (!Subtarget.hasFSQRT() &&
299
1.85k
      
!(572
TM.Options.UnsafeFPMath572
&&
Subtarget.hasFRSQRTES()3
&&
300
572
        
Subtarget.hasFRES()0
))
301
572
    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
302
1.85k
303
1.85k
  if (Subtarget.hasFCPSGN()) {
304
1.19k
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
305
1.19k
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
306
1.19k
  } else {
307
661
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
308
661
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
309
661
  }
310
1.85k
311
1.85k
  if (Subtarget.hasFPRND()) {
312
1.19k
    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
313
1.19k
    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
314
1.19k
    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
315
1.19k
    setOperationAction(ISD::FROUND, MVT::f64, Legal);
316
1.19k
317
1.19k
    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
318
1.19k
    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
319
1.19k
    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
320
1.19k
    setOperationAction(ISD::FROUND, MVT::f32, Legal);
321
1.19k
  }
322
1.85k
323
1.85k
  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324
1.85k
  // to speed up scalar BSWAP64.
325
1.85k
  // CTPOP or CTTZ were introduced in P8/P9 respectively
326
1.85k
  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
327
1.85k
  if (Subtarget.hasP9Vector())
328
200
    setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
329
1.65k
  else
330
1.65k
    setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
331
1.85k
  if (Subtarget.isISA3_0()) {
332
214
    setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
333
214
    setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
334
1.63k
  } else {
335
1.63k
    setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
336
1.63k
    setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
337
1.63k
  }
338
1.85k
339
1.85k
  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
340
1.10k
    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
341
1.10k
    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
342
1.10k
  } else {
343
751
    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
344
751
    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
345
751
  }
346
1.85k
347
1.85k
  // PowerPC does not have ROTR
348
1.85k
  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
349
1.85k
  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
350
1.85k
351
1.85k
  if (!Subtarget.useCRBits()) {
352
232
    // PowerPC does not have Select
353
232
    setOperationAction(ISD::SELECT, MVT::i32, Expand);
354
232
    setOperationAction(ISD::SELECT, MVT::i64, Expand);
355
232
    setOperationAction(ISD::SELECT, MVT::f32, Expand);
356
232
    setOperationAction(ISD::SELECT, MVT::f64, Expand);
357
232
  }
358
1.85k
359
1.85k
  // PowerPC wants to turn select_cc of FP into fsel when possible.
360
1.85k
  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
361
1.85k
  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
362
1.85k
363
1.85k
  // PowerPC wants to optimize integer setcc a bit
364
1.85k
  if (!Subtarget.useCRBits())
365
232
    setOperationAction(ISD::SETCC, MVT::i32, Custom);
366
1.85k
367
1.85k
  // PowerPC does not have BRCOND which requires SetCC
368
1.85k
  if (!Subtarget.useCRBits())
369
232
    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
370
1.85k
371
1.85k
  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
372
1.85k
373
1.85k
  if (Subtarget.hasSPE()) {
374
4
    // SPE has built-in conversions
375
4
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
376
4
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
377
4
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
378
1.84k
  } else {
379
1.84k
    // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380
1.84k
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
381
1.84k
382
1.84k
    // PowerPC does not have [U|S]INT_TO_FP
383
1.84k
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
384
1.84k
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
385
1.84k
  }
386
1.85k
387
1.85k
  if (Subtarget.hasDirectMove() && 
isPPC64794
) {
388
792
    setOperationAction(ISD::BITCAST, MVT::f32, Legal);
389
792
    setOperationAction(ISD::BITCAST, MVT::i32, Legal);
390
792
    setOperationAction(ISD::BITCAST, MVT::i64, Legal);
391
792
    setOperationAction(ISD::BITCAST, MVT::f64, Legal);
392
1.06k
  } else {
393
1.06k
    setOperationAction(ISD::BITCAST, MVT::f32, Expand);
394
1.06k
    setOperationAction(ISD::BITCAST, MVT::i32, Expand);
395
1.06k
    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
396
1.06k
    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
397
1.06k
  }
398
1.85k
399
1.85k
  // We cannot sextinreg(i1).  Expand to shifts.
400
1.85k
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
401
1.85k
402
1.85k
  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403
1.85k
  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404
1.85k
  // support continuation, user-level threading, and etc.. As a result, no
405
1.85k
  // other SjLj exception interfaces are implemented and please don't build
406
1.85k
  // your own exception handling based on them.
407
1.85k
  // LLVM/Clang supports zero-cost DWARF exception handling.
408
1.85k
  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
409
1.85k
  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
410
1.85k
411
1.85k
  // We want to legalize GlobalAddress and ConstantPool nodes into the
412
1.85k
  // appropriate instructions to materialize the address.
413
1.85k
  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
414
1.85k
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
415
1.85k
  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
416
1.85k
  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
417
1.85k
  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
418
1.85k
  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
419
1.85k
  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
420
1.85k
  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
421
1.85k
  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
422
1.85k
  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
423
1.85k
424
1.85k
  // TRAP is legal.
425
1.85k
  setOperationAction(ISD::TRAP, MVT::Other, Legal);
426
1.85k
427
1.85k
  // TRAMPOLINE is custom lowered.
428
1.85k
  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
429
1.85k
  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
430
1.85k
431
1.85k
  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432
1.85k
  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
433
1.85k
434
1.85k
  if (Subtarget.isSVR4ABI()) {
435
1.84k
    if (isPPC64) {
436
1.46k
      // VAARG always uses double-word chunks, so promote anything smaller.
437
1.46k
      setOperationAction(ISD::VAARG, MVT::i1, Promote);
438
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
439
1.46k
      setOperationAction(ISD::VAARG, MVT::i8, Promote);
440
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
441
1.46k
      setOperationAction(ISD::VAARG, MVT::i16, Promote);
442
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
443
1.46k
      setOperationAction(ISD::VAARG, MVT::i32, Promote);
444
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
445
1.46k
      setOperationAction(ISD::VAARG, MVT::Other, Expand);
446
1.46k
    } else {
447
385
      // VAARG is custom lowered with the 32-bit SVR4 ABI.
448
385
      setOperationAction(ISD::VAARG, MVT::Other, Custom);
449
385
      setOperationAction(ISD::VAARG, MVT::i64, Custom);
450
385
    }
451
1.84k
  } else
452
4
    setOperationAction(ISD::VAARG, MVT::Other, Expand);
453
1.85k
454
1.85k
  if (Subtarget.isSVR4ABI() && 
!isPPC641.84k
)
455
385
    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
456
385
    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
457
1.46k
  else
458
1.46k
    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
459
1.85k
460
1.85k
  // Use the default implementation.
461
1.85k
  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
462
1.85k
  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
463
1.85k
  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
464
1.85k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
465
1.85k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
466
1.85k
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
467
1.85k
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
468
1.85k
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
469
1.85k
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
470
1.85k
471
1.85k
  // We want to custom lower some of our intrinsics.
472
1.85k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
473
1.85k
474
1.85k
  // To handle counter-based loop conditions.
475
1.85k
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
476
1.85k
477
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
478
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
479
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
480
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
481
1.85k
482
1.85k
  // Comparisons that require checking two conditions.
483
1.85k
  if (Subtarget.hasSPE()) {
484
4
    setCondCodeAction(ISD::SETO, MVT::f32, Expand);
485
4
    setCondCodeAction(ISD::SETO, MVT::f64, Expand);
486
4
    setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
487
4
    setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
488
4
  }
489
1.85k
  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
490
1.85k
  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
491
1.85k
  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
492
1.85k
  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
493
1.85k
  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
494
1.85k
  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
495
1.85k
  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
496
1.85k
  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
497
1.85k
  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
498
1.85k
  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
499
1.85k
  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
500
1.85k
  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
501
1.85k
502
1.85k
  if (Subtarget.has64BitSupport()) {
503
1.51k
    // They also have instructions for converting between i64 and fp.
504
1.51k
    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
505
1.51k
    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
506
1.51k
    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
507
1.51k
    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
508
1.51k
    // This is just the low 32 bits of a (signed) fp->i64 conversion.
509
1.51k
    // We cannot do this with Promote because i64 is not a legal type.
510
1.51k
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
511
1.51k
512
1.51k
    if (Subtarget.hasLFIWAX() || 
Subtarget.isPPC64()326
)
513
1.47k
      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
514
1.51k
  } else {
515
335
    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
516
335
    if (Subtarget.hasSPE())
517
4
      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
518
331
    else
519
331
      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
520
335
  }
521
1.85k
522
1.85k
  // With the instructions enabled under FPCVT, we can do everything.
523
1.85k
  if (Subtarget.hasFPCVT()) {
524
1.17k
    if (Subtarget.has64BitSupport()) {
525
1.17k
      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
526
1.17k
      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
527
1.17k
      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
528
1.17k
      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
529
1.17k
    }
530
1.17k
531
1.17k
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
532
1.17k
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
533
1.17k
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
534
1.17k
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
535
1.17k
  }
536
1.85k
537
1.85k
  if (Subtarget.use64BitRegs()) {
538
1.46k
    // 64-bit PowerPC implementations can support i64 types directly
539
1.46k
    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
540
1.46k
    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
541
1.46k
    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
542
1.46k
    // 64-bit PowerPC wants to expand i128 shifts itself.
543
1.46k
    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
544
1.46k
    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
545
1.46k
    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
546
1.46k
  } else {
547
387
    // 32-bit PowerPC wants to expand i64 shifts itself.
548
387
    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
549
387
    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
550
387
    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
551
387
  }
552
1.85k
553
1.85k
  if (Subtarget.hasAltivec()) {
554
1.22k
    // First set operation action for all vector types to expand. Then we
555
1.22k
    // will selectively turn on ones that can be effectively codegen'd.
556
135k
    for (MVT VT : MVT::vector_valuetypes()) {
557
135k
      // add/sub are legal for all supported vector VT's.
558
135k
      setOperationAction(ISD::ADD, VT, Legal);
559
135k
      setOperationAction(ISD::SUB, VT, Legal);
560
135k
561
135k
      // For v2i64, these are only valid with P8Vector. This is corrected after
562
135k
      // the loop.
563
135k
      setOperationAction(ISD::SMAX, VT, Legal);
564
135k
      setOperationAction(ISD::SMIN, VT, Legal);
565
135k
      setOperationAction(ISD::UMAX, VT, Legal);
566
135k
      setOperationAction(ISD::UMIN, VT, Legal);
567
135k
568
135k
      if (Subtarget.hasVSX()) {
569
115k
        setOperationAction(ISD::FMAXNUM, VT, Legal);
570
115k
        setOperationAction(ISD::FMINNUM, VT, Legal);
571
115k
      }
572
135k
573
135k
      // Vector instructions introduced in P8
574
135k
      if (Subtarget.hasP8Altivec() && 
(VT.SimpleTy != MVT::v1i128)92.0k
) {
575
91.1k
        setOperationAction(ISD::CTPOP, VT, Legal);
576
91.1k
        setOperationAction(ISD::CTLZ, VT, Legal);
577
91.1k
      }
578
44.2k
      else {
579
44.2k
        setOperationAction(ISD::CTPOP, VT, Expand);
580
44.2k
        setOperationAction(ISD::CTLZ, VT, Expand);
581
44.2k
      }
582
135k
583
135k
      // Vector instructions introduced in P9
584
135k
      if (Subtarget.hasP9Altivec() && 
(VT.SimpleTy != MVT::v1i128)23.6k
)
585
23.4k
        setOperationAction(ISD::CTTZ, VT, Legal);
586
111k
      else
587
111k
        setOperationAction(ISD::CTTZ, VT, Expand);
588
135k
589
135k
      // We promote all shuffles to v16i8.
590
135k
      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
591
135k
      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
592
135k
593
135k
      // We promote all non-typed operations to v4i32.
594
135k
      setOperationAction(ISD::AND   , VT, Promote);
595
135k
      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
596
135k
      setOperationAction(ISD::OR    , VT, Promote);
597
135k
      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
598
135k
      setOperationAction(ISD::XOR   , VT, Promote);
599
135k
      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
600
135k
      setOperationAction(ISD::LOAD  , VT, Promote);
601
135k
      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
602
135k
      setOperationAction(ISD::SELECT, VT, Promote);
603
135k
      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
604
135k
      setOperationAction(ISD::VSELECT, VT, Legal);
605
135k
      setOperationAction(ISD::SELECT_CC, VT, Promote);
606
135k
      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
607
135k
      setOperationAction(ISD::STORE, VT, Promote);
608
135k
      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
609
135k
610
135k
      // No other operations are legal.
611
135k
      setOperationAction(ISD::MUL , VT, Expand);
612
135k
      setOperationAction(ISD::SDIV, VT, Expand);
613
135k
      setOperationAction(ISD::SREM, VT, Expand);
614
135k
      setOperationAction(ISD::UDIV, VT, Expand);
615
135k
      setOperationAction(ISD::UREM, VT, Expand);
616
135k
      setOperationAction(ISD::FDIV, VT, Expand);
617
135k
      setOperationAction(ISD::FREM, VT, Expand);
618
135k
      setOperationAction(ISD::FNEG, VT, Expand);
619
135k
      setOperationAction(ISD::FSQRT, VT, Expand);
620
135k
      setOperationAction(ISD::FLOG, VT, Expand);
621
135k
      setOperationAction(ISD::FLOG10, VT, Expand);
622
135k
      setOperationAction(ISD::FLOG2, VT, Expand);
623
135k
      setOperationAction(ISD::FEXP, VT, Expand);
624
135k
      setOperationAction(ISD::FEXP2, VT, Expand);
625
135k
      setOperationAction(ISD::FSIN, VT, Expand);
626
135k
      setOperationAction(ISD::FCOS, VT, Expand);
627
135k
      setOperationAction(ISD::FABS, VT, Expand);
628
135k
      setOperationAction(ISD::FFLOOR, VT, Expand);
629
135k
      setOperationAction(ISD::FCEIL,  VT, Expand);
630
135k
      setOperationAction(ISD::FTRUNC, VT, Expand);
631
135k
      setOperationAction(ISD::FRINT,  VT, Expand);
632
135k
      setOperationAction(ISD::FNEARBYINT, VT, Expand);
633
135k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
634
135k
      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
635
135k
      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
636
135k
      setOperationAction(ISD::MULHU, VT, Expand);
637
135k
      setOperationAction(ISD::MULHS, VT, Expand);
638
135k
      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
639
135k
      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
640
135k
      setOperationAction(ISD::UDIVREM, VT, Expand);
641
135k
      setOperationAction(ISD::SDIVREM, VT, Expand);
642
135k
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
643
135k
      setOperationAction(ISD::FPOW, VT, Expand);
644
135k
      setOperationAction(ISD::BSWAP, VT, Expand);
645
135k
      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
646
135k
      setOperationAction(ISD::ROTL, VT, Expand);
647
135k
      setOperationAction(ISD::ROTR, VT, Expand);
648
135k
649
15.0M
      for (MVT InnerVT : MVT::vector_valuetypes()) {
650
15.0M
        setTruncStoreAction(VT, InnerVT, Expand);
651
15.0M
        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
652
15.0M
        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
653
15.0M
        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
654
15.0M
      }
655
135k
    }
656
1.22k
    if (!Subtarget.hasP8Vector()) {
657
420
      setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
658
420
      setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
659
420
      setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
660
420
      setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
661
420
    }
662
1.22k
663
1.22k
    for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
664
4.88k
      setOperationAction(ISD::ABS, VT, Custom);
665
1.22k
666
1.22k
    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
667
1.22k
    // with merges, splats, etc.
668
1.22k
    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
669
1.22k
670
1.22k
    // Vector truncates to sub-word integer that fit in an Altivec/VSX register
671
1.22k
    // are cheap, so handle them before they get expanded to scalar.
672
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
673
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
674
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
675
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
676
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
677
1.22k
678
1.22k
    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
679
1.22k
    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
680
1.22k
    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
681
1.22k
    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
682
1.22k
    setOperationAction(ISD::SELECT, MVT::v4i32,
683
1.22k
                       Subtarget.useCRBits() ? 
Legal1.05k
:
Expand166
);
684
1.22k
    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
685
1.22k
    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
686
1.22k
    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
687
1.22k
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
688
1.22k
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
689
1.22k
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
690
1.22k
    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
691
1.22k
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
692
1.22k
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
693
1.22k
694
1.22k
    // Without hasP8Altivec set, v2i64 SMAX isn't available.
695
1.22k
    // But ABS custom lowering requires SMAX support.
696
1.22k
    if (!Subtarget.hasP8Altivec())
697
391
      setOperationAction(ISD::ABS, MVT::v2i64, Expand);
698
1.22k
699
1.22k
    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
700
1.22k
    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
701
1.22k
    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
702
1.22k
    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
703
1.22k
704
1.22k
    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
705
1.22k
    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
706
1.22k
707
1.22k
    if (TM.Options.UnsafeFPMath || 
Subtarget.hasVSX()1.20k
) {
708
1.04k
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
709
1.04k
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
710
1.04k
    }
711
1.22k
712
1.22k
    if (Subtarget.hasP8Altivec())
713
829
      setOperationAction(ISD::MUL, MVT::v4i32, Legal);
714
391
    else
715
391
      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
716
1.22k
717
1.22k
    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
718
1.22k
    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
719
1.22k
720
1.22k
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
721
1.22k
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
722
1.22k
723
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
724
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
725
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
726
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
727
1.22k
728
1.22k
    // Altivec does not contain unordered floating-point compare instructions
729
1.22k
    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
730
1.22k
    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
731
1.22k
    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
732
1.22k
    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
733
1.22k
734
1.22k
    if (Subtarget.hasVSX()) {
735
1.03k
      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
736
1.03k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
737
1.03k
      if (Subtarget.hasP8Vector()) {
738
800
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
739
800
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
740
800
      }
741
1.03k
      if (Subtarget.hasDirectMove() && 
isPPC64794
) {
742
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
743
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
744
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
745
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
746
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
747
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
748
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
749
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
750
792
      }
751
1.03k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
752
1.03k
753
1.03k
      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
754
1.03k
      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
755
1.03k
      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
756
1.03k
      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
757
1.03k
      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
758
1.03k
759
1.03k
      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
760
1.03k
761
1.03k
      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
762
1.03k
      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
763
1.03k
764
1.03k
      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
765
1.03k
      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
766
1.03k
767
1.03k
      // Share the Altivec comparison restrictions.
768
1.03k
      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
769
1.03k
      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
770
1.03k
      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
771
1.03k
      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
772
1.03k
773
1.03k
      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
774
1.03k
      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
775
1.03k
776
1.03k
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
777
1.03k
778
1.03k
      if (Subtarget.hasP8Vector())
779
800
        addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
780
1.03k
781
1.03k
      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
782
1.03k
783
1.03k
      addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
784
1.03k
      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
785
1.03k
      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
786
1.03k
787
1.03k
      if (Subtarget.hasP8Altivec()) {
788
802
        setOperationAction(ISD::SHL, MVT::v2i64, Legal);
789
802
        setOperationAction(ISD::SRA, MVT::v2i64, Legal);
790
802
        setOperationAction(ISD::SRL, MVT::v2i64, Legal);
791
802
792
802
        // 128 bit shifts can be accomplished via 3 instructions for SHL and
793
802
        // SRL, but not for SRA because of the instructions available:
794
802
        // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
795
802
        // doing
796
802
        setOperationAction(ISD::SHL, MVT::v1i128, Expand);
797
802
        setOperationAction(ISD::SRL, MVT::v1i128, Expand);
798
802
        setOperationAction(ISD::SRA, MVT::v1i128, Expand);
799
802
800
802
        setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
801
802
      }
802
236
      else {
803
236
        setOperationAction(ISD::SHL, MVT::v2i64, Expand);
804
236
        setOperationAction(ISD::SRA, MVT::v2i64, Expand);
805
236
        setOperationAction(ISD::SRL, MVT::v2i64, Expand);
806
236
807
236
        setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
808
236
809
236
        // VSX v2i64 only supports non-arithmetic operations.
810
236
        setOperationAction(ISD::ADD, MVT::v2i64, Expand);
811
236
        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
812
236
      }
813
1.03k
814
1.03k
      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
815
1.03k
      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
816
1.03k
      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
817
1.03k
      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
818
1.03k
819
1.03k
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
820
1.03k
821
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
822
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
823
1.03k
      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
824
1.03k
      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
825
1.03k
826
1.03k
      // Custom handling for partial vectors of integers converted to
827
1.03k
      // floating point. We already have optimal handling for v2i32 through
828
1.03k
      // the DAG combine, so those aren't necessary.
829
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
830
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
831
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
832
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
833
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
834
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
835
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
836
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
837
1.03k
838
1.03k
      setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
839
1.03k
      setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
840
1.03k
      setOperationAction(ISD::FABS, MVT::v4f32, Legal);
841
1.03k
      setOperationAction(ISD::FABS, MVT::v2f64, Legal);
842
1.03k
      setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
843
1.03k
      setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
844
1.03k
845
1.03k
      if (Subtarget.hasDirectMove())
846
794
        setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
847
1.03k
      setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
848
1.03k
849
1.03k
      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
850
1.03k
    }
851
1.22k
852
1.22k
    if (Subtarget.hasP8Altivec()) {
853
829
      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
854
829
      addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
855
829
    }
856
1.22k
857
1.22k
    if (Subtarget.hasP9Vector()) {
858
200
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
859
200
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
860
200
861
200
      // 128 bit shifts can be accomplished via 3 instructions for SHL and
862
200
      // SRL, but not for SRA because of the instructions available:
863
200
      // VS{RL} and VS{RL}O.
864
200
      setOperationAction(ISD::SHL, MVT::v1i128, Legal);
865
200
      setOperationAction(ISD::SRL, MVT::v1i128, Legal);
866
200
      setOperationAction(ISD::SRA, MVT::v1i128, Expand);
867
200
868
200
      if (EnableQuadPrecision) {
869
15
        addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
870
15
        setOperationAction(ISD::FADD, MVT::f128, Legal);
871
15
        setOperationAction(ISD::FSUB, MVT::f128, Legal);
872
15
        setOperationAction(ISD::FDIV, MVT::f128, Legal);
873
15
        setOperationAction(ISD::FMUL, MVT::f128, Legal);
874
15
        setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
875
15
        // No extending loads to f128 on PPC.
876
15
        for (MVT FPT : MVT::fp_valuetypes())
877
90
          setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
878
15
        setOperationAction(ISD::FMA, MVT::f128, Legal);
879
15
        setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
880
15
        setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
881
15
        setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
882
15
        setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
883
15
        setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
884
15
        setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
885
15
886
15
        setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
887
15
        setOperationAction(ISD::FRINT, MVT::f128, Legal);
888
15
        setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
889
15
        setOperationAction(ISD::FCEIL, MVT::f128, Legal);
890
15
        setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
891
15
        setOperationAction(ISD::FROUND, MVT::f128, Legal);
892
15
893
15
        setOperationAction(ISD::SELECT, MVT::f128, Expand);
894
15
        setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
895
15
        setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
896
15
        setTruncStoreAction(MVT::f128, MVT::f64, Expand);
897
15
        setTruncStoreAction(MVT::f128, MVT::f32, Expand);
898
15
        setOperationAction(ISD::BITCAST, MVT::i128, Custom);
899
15
        // No implementation for these ops for PowerPC.
900
15
        setOperationAction(ISD::FSIN , MVT::f128, Expand);
901
15
        setOperationAction(ISD::FCOS , MVT::f128, Expand);
902
15
        setOperationAction(ISD::FPOW, MVT::f128, Expand);
903
15
        setOperationAction(ISD::FPOWI, MVT::f128, Expand);
904
15
        setOperationAction(ISD::FREM, MVT::f128, Expand);
905
15
      }
906
200
      setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
907
200
908
200
    }
909
1.22k
910
1.22k
    if (Subtarget.hasP9Altivec()) {
911
213
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
912
213
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
913
213
    }
914
1.22k
  }
915
1.85k
916
1.85k
  if (Subtarget.hasQPX()) {
917
40
    setOperationAction(ISD::FADD, MVT::v4f64, Legal);
918
40
    setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
919
40
    setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
920
40
    setOperationAction(ISD::FREM, MVT::v4f64, Expand);
921
40
922
40
    setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
923
40
    setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
924
40
925
40
    setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
926
40
    setOperationAction(ISD::STORE , MVT::v4f64, Custom);
927
40
928
40
    setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
929
40
    setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
930
40
931
40
    if (!Subtarget.useCRBits())
932
5
      setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
933
40
    setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
934
40
935
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
936
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
937
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
938
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
939
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
940
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
941
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
942
40
943
40
    setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
944
40
    setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
945
40
946
40
    setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
947
40
    setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
948
40
    setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
949
40
950
40
    setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
951
40
    setOperationAction(ISD::FABS , MVT::v4f64, Legal);
952
40
    setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
953
40
    setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
954
40
    setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
955
40
    setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
956
40
    setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
957
40
    setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
958
40
    setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
959
40
    setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
960
40
961
40
    setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
962
40
    setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
963
40
964
40
    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
965
40
    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
966
40
967
40
    addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
968
40
969
40
    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
970
40
    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
971
40
    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
972
40
    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
973
40
974
40
    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
975
40
    setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
976
40
977
40
    setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
978
40
    setOperationAction(ISD::STORE , MVT::v4f32, Custom);
979
40
980
40
    if (!Subtarget.useCRBits())
981
5
      setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
982
40
    setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
983
40
984
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
985
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
986
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
987
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
988
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
989
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
990
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
991
40
992
40
    setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
993
40
    setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
994
40
995
40
    setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
996
40
    setOperationAction(ISD::FABS , MVT::v4f32, Legal);
997
40
    setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
998
40
    setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
999
40
    setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
1000
40
    setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
1001
40
    setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
1002
40
    setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
1003
40
    setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
1004
40
    setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
1005
40
1006
40
    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1007
40
    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1008
40
1009
40
    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
1010
40
    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
1011
40
1012
40
    addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1013
40
1014
40
    setOperationAction(ISD::AND , MVT::v4i1, Legal);
1015
40
    setOperationAction(ISD::OR , MVT::v4i1, Legal);
1016
40
    setOperationAction(ISD::XOR , MVT::v4i1, Legal);
1017
40
1018
40
    if (!Subtarget.useCRBits())
1019
5
      setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
1020
40
    setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
1021
40
1022
40
    setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
1023
40
    setOperationAction(ISD::STORE , MVT::v4i1, Custom);
1024
40
1025
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
1026
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
1027
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
1028
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
1029
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
1030
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
1031
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1032
40
1033
40
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1034
40
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1035
40
1036
40
    addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1037
40
1038
40
    setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1039
40
    setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
1040
40
    setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1041
40
    setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1042
40
1043
40
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1044
40
    setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
1045
40
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1046
40
    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1047
40
1048
40
    setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1049
40
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1050
40
1051
40
    // These need to set FE_INEXACT, and so cannot be vectorized here.
1052
40
    setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1053
40
    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1054
40
1055
40
    if (TM.Options.UnsafeFPMath) {
1056
3
      setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1057
3
      setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1058
3
1059
3
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1060
3
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1061
37
    } else {
1062
37
      setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1063
37
      setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1064
37
1065
37
      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1066
37
      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1067
37
    }
1068
40
  }
1069
1.85k
1070
1.85k
  if (Subtarget.has64BitSupport())
1071
1.51k
    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1072
1.85k
1073
1.85k
  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? 
Legal1.46k
:
Custom387
);
1074
1.85k
1075
1.85k
  if (!isPPC64) {
1076
387
    setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1077
387
    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1078
387
  }
1079
1.85k
1080
1.85k
  setBooleanContents(ZeroOrOneBooleanContent);
1081
1.85k
1082
1.85k
  if (Subtarget.hasAltivec()) {
1083
1.22k
    // Altivec instructions set fields to all zeros or all ones.
1084
1.22k
    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1085
1.22k
  }
1086
1.85k
1087
1.85k
  if (!isPPC64) {
1088
387
    // These libcalls are not available in 32-bit.
1089
387
    setLibcallName(RTLIB::SHL_I128, nullptr);
1090
387
    setLibcallName(RTLIB::SRL_I128, nullptr);
1091
387
    setLibcallName(RTLIB::SRA_I128, nullptr);
1092
387
  }
1093
1.85k
1094
1.85k
  setStackPointerRegisterToSaveRestore(isPPC64 ? 
PPC::X11.46k
:
PPC::R1387
);
1095
1.85k
1096
1.85k
  // We have target-specific dag combine patterns for the following nodes:
1097
1.85k
  setTargetDAGCombine(ISD::ADD);
1098
1.85k
  setTargetDAGCombine(ISD::SHL);
1099
1.85k
  setTargetDAGCombine(ISD::SRA);
1100
1.85k
  setTargetDAGCombine(ISD::SRL);
1101
1.85k
  setTargetDAGCombine(ISD::MUL);
1102
1.85k
  setTargetDAGCombine(ISD::SINT_TO_FP);
1103
1.85k
  setTargetDAGCombine(ISD::BUILD_VECTOR);
1104
1.85k
  if (Subtarget.hasFPCVT())
1105
1.17k
    setTargetDAGCombine(ISD::UINT_TO_FP);
1106
1.85k
  setTargetDAGCombine(ISD::LOAD);
1107
1.85k
  setTargetDAGCombine(ISD::STORE);
1108
1.85k
  setTargetDAGCombine(ISD::BR_CC);
1109
1.85k
  if (Subtarget.useCRBits())
1110
1.62k
    setTargetDAGCombine(ISD::BRCOND);
1111
1.85k
  setTargetDAGCombine(ISD::BSWAP);
1112
1.85k
  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1113
1.85k
  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1114
1.85k
  setTargetDAGCombine(ISD::INTRINSIC_VOID);
1115
1.85k
1116
1.85k
  setTargetDAGCombine(ISD::SIGN_EXTEND);
1117
1.85k
  setTargetDAGCombine(ISD::ZERO_EXTEND);
1118
1.85k
  setTargetDAGCombine(ISD::ANY_EXTEND);
1119
1.85k
1120
1.85k
  setTargetDAGCombine(ISD::TRUNCATE);
1121
1.85k
1122
1.85k
  if (Subtarget.useCRBits()) {
1123
1.62k
    setTargetDAGCombine(ISD::TRUNCATE);
1124
1.62k
    setTargetDAGCombine(ISD::SETCC);
1125
1.62k
    setTargetDAGCombine(ISD::SELECT_CC);
1126
1.62k
  }
1127
1.85k
1128
1.85k
  // Use reciprocal estimates.
1129
1.85k
  if (TM.Options.UnsafeFPMath) {
1130
17
    setTargetDAGCombine(ISD::FDIV);
1131
17
    setTargetDAGCombine(ISD::FSQRT);
1132
17
  }
1133
1.85k
1134
1.85k
  if (Subtarget.hasP9Altivec()) {
1135
213
    setTargetDAGCombine(ISD::ABS);
1136
213
    setTargetDAGCombine(ISD::VSELECT);
1137
213
  }
1138
1.85k
1139
1.85k
  // Darwin long double math library functions have $LDBL128 appended.
1140
1.85k
  if (Subtarget.isDarwin()) {
1141
0
    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1142
0
    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1143
0
    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1144
0
    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1145
0
    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1146
0
    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1147
0
    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1148
0
    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1149
0
    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1150
0
    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1151
0
  }
1152
1.85k
1153
1.85k
  if (EnableQuadPrecision) {
1154
15
    setLibcallName(RTLIB::LOG_F128, "logf128");
1155
15
    setLibcallName(RTLIB::LOG2_F128, "log2f128");
1156
15
    setLibcallName(RTLIB::LOG10_F128, "log10f128");
1157
15
    setLibcallName(RTLIB::EXP_F128, "expf128");
1158
15
    setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1159
15
    setLibcallName(RTLIB::SIN_F128, "sinf128");
1160
15
    setLibcallName(RTLIB::COS_F128, "cosf128");
1161
15
    setLibcallName(RTLIB::POW_F128, "powf128");
1162
15
    setLibcallName(RTLIB::FMIN_F128, "fminf128");
1163
15
    setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1164
15
    setLibcallName(RTLIB::POWI_F128, "__powikf2");
1165
15
    setLibcallName(RTLIB::REM_F128, "fmodf128");
1166
15
  }
1167
1.85k
1168
1.85k
  // With 32 condition bits, we don't need to sink (and duplicate) compares
1169
1.85k
  // aggressively in CodeGenPrep.
1170
1.85k
  if (Subtarget.useCRBits()) {
1171
1.62k
    setHasMultipleConditionRegisters();
1172
1.62k
    setJumpIsExpensive();
1173
1.62k
  }
1174
1.85k
1175
1.85k
  setMinFunctionAlignment(2);
1176
1.85k
  if (Subtarget.isDarwin())
1177
0
    setPrefFunctionAlignment(4);
1178
1.85k
1179
1.85k
  switch (Subtarget.getDarwinDirective()) {
1180
1.85k
  
default: break595
;
1181
1.85k
  case PPC::DIR_970:
1182
1.25k
  case PPC::DIR_A2:
1183
1.25k
  case PPC::DIR_E500:
1184
1.25k
  case PPC::DIR_E500mc:
1185
1.25k
  case PPC::DIR_E5500:
1186
1.25k
  case PPC::DIR_PWR4:
1187
1.25k
  case PPC::DIR_PWR5:
1188
1.25k
  case PPC::DIR_PWR5X:
1189
1.25k
  case PPC::DIR_PWR6:
1190
1.25k
  case PPC::DIR_PWR6X:
1191
1.25k
  case PPC::DIR_PWR7:
1192
1.25k
  case PPC::DIR_PWR8:
1193
1.25k
  case PPC::DIR_PWR9:
1194
1.25k
    setPrefFunctionAlignment(4);
1195
1.25k
    setPrefLoopAlignment(4);
1196
1.25k
    break;
1197
1.85k
  }
1198
1.85k
1199
1.85k
  if (Subtarget.enableMachineScheduler())
1200
1.85k
    setSchedulingPreference(Sched::Source);
1201
0
  else
1202
0
    setSchedulingPreference(Sched::Hybrid);
1203
1.85k
1204
1.85k
  computeRegisterProperties(STI.getRegisterInfo());
1205
1.85k
1206
1.85k
  // The Freescale cores do better with aggressive inlining of memcpy and
1207
1.85k
  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1208
1.85k
  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1209
1.85k
      
Subtarget.getDarwinDirective() == PPC::DIR_E55001.84k
) {
1210
5
    MaxStoresPerMemset = 32;
1211
5
    MaxStoresPerMemsetOptSize = 16;
1212
5
    MaxStoresPerMemcpy = 32;
1213
5
    MaxStoresPerMemcpyOptSize = 8;
1214
5
    MaxStoresPerMemmove = 32;
1215
5
    MaxStoresPerMemmoveOptSize = 8;
1216
1.84k
  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1217
83
    // The A2 also benefits from (very) aggressive inlining of memcpy and
1218
83
    // friends. The overhead of a the function call, even when warm, can be
1219
83
    // over one hundred cycles.
1220
83
    MaxStoresPerMemset = 128;
1221
83
    MaxStoresPerMemcpy = 128;
1222
83
    MaxStoresPerMemmove = 128;
1223
83
    MaxLoadsPerMemcmp = 128;
1224
1.76k
  } else {
1225
1.76k
    MaxLoadsPerMemcmp = 8;
1226
1.76k
    MaxLoadsPerMemcmpOptSize = 4;
1227
1.76k
  }
1228
1.85k
}
1229
1230
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1231
/// the desired ByVal argument alignment.
1232
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1233
285
                             unsigned MaxMaxAlign) {
1234
285
  if (MaxAlign == MaxMaxAlign)
1235
0
    return;
1236
285
  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1237
4
    if (MaxMaxAlign >= 32 && 
VTy->getBitWidth() >= 2560
)
1238
0
      MaxAlign = 32;
1239
4
    else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1240
4
      MaxAlign = 16;
1241
281
  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1242
20
    unsigned EltAlign = 0;
1243
20
    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1244
20
    if (EltAlign > MaxAlign)
1245
0
      MaxAlign = EltAlign;
1246
261
  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1247
162
    for (auto *EltTy : STy->elements()) {
1248
162
      unsigned EltAlign = 0;
1249
162
      getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1250
162
      if (EltAlign > MaxAlign)
1251
4
        MaxAlign = EltAlign;
1252
162
      if (MaxAlign == MaxMaxAlign)
1253
4
        break;
1254
162
    }
1255
103
  }
1256
285
}
1257
1258
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1259
/// function arguments in the caller parameter area.
1260
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1261
125
                                                  const DataLayout &DL) const {
1262
125
  // Darwin passes everything on 4 byte boundary.
1263
125
  if (Subtarget.isDarwin())
1264
0
    return 4;
1265
125
1266
125
  // 16byte and wider vectors are passed on 16byte boundary.
1267
125
  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1268
125
  unsigned Align = Subtarget.isPPC64() ? 
8113
:
412
;
1269
125
  if (Subtarget.hasAltivec() || 
Subtarget.hasQPX()22
)
1270
103
    getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 
320
: 16);
1271
125
  return Align;
1272
125
}
1273
1274
17.7k
bool PPCTargetLowering::useSoftFloat() const {
1275
17.7k
  return Subtarget.useSoftFloat();
1276
17.7k
}
1277
1278
9.69k
bool PPCTargetLowering::hasSPE() const {
1279
9.69k
  return Subtarget.hasSPE();
1280
9.69k
}
1281
1282
42.8k
bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1283
42.8k
  return VT.isScalarInteger();
1284
42.8k
}
1285
1286
0
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1287
0
  switch ((PPCISD::NodeType)Opcode) {
1288
0
  case PPCISD::FIRST_NUMBER:    break;
1289
0
  case PPCISD::FSEL:            return "PPCISD::FSEL";
1290
0
  case PPCISD::FCFID:           return "PPCISD::FCFID";
1291
0
  case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1292
0
  case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1293
0
  case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1294
0
  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1295
0
  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1296
0
  case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1297
0
  case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1298
0
  case PPCISD::FP_TO_UINT_IN_VSR:
1299
0
                                return "PPCISD::FP_TO_UINT_IN_VSR,";
1300
0
  case PPCISD::FP_TO_SINT_IN_VSR:
1301
0
                                return "PPCISD::FP_TO_SINT_IN_VSR";
1302
0
  case PPCISD::FRE:             return "PPCISD::FRE";
1303
0
  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1304
0
  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1305
0
  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1306
0
  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1307
0
  case PPCISD::VPERM:           return "PPCISD::VPERM";
1308
0
  case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1309
0
  case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1310
0
  case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1311
0
  case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1312
0
  case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1313
0
  case PPCISD::CMPB:            return "PPCISD::CMPB";
1314
0
  case PPCISD::Hi:              return "PPCISD::Hi";
1315
0
  case PPCISD::Lo:              return "PPCISD::Lo";
1316
0
  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1317
0
  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1318
0
  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1319
0
  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1320
0
  case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1321
0
  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1322
0
  case PPCISD::SRL:             return "PPCISD::SRL";
1323
0
  case PPCISD::SRA:             return "PPCISD::SRA";
1324
0
  case PPCISD::SHL:             return "PPCISD::SHL";
1325
0
  case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1326
0
  case PPCISD::CALL:            return "PPCISD::CALL";
1327
0
  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1328
0
  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1329
0
  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1330
0
  case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1331
0
  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1332
0
  case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1333
0
  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1334
0
  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1335
0
  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1336
0
  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1337
0
  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1338
0
  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1339
0
  case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1340
0
  case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1341
0
  case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1342
0
  case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1343
0
  case PPCISD::VCMP:            return "PPCISD::VCMP";
1344
0
  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1345
0
  case PPCISD::LBRX:            return "PPCISD::LBRX";
1346
0
  case PPCISD::STBRX:           return "PPCISD::STBRX";
1347
0
  case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1348
0
  case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1349
0
  case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1350
0
  case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1351
0
  case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1352
0
  case PPCISD::SExtVElems:      return "PPCISD::SExtVElems";
1353
0
  case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1354
0
  case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1355
0
  case PPCISD::ST_VSR_SCAL_INT:
1356
0
                                return "PPCISD::ST_VSR_SCAL_INT";
1357
0
  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1358
0
  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1359
0
  case PPCISD::BDZ:             return "PPCISD::BDZ";
1360
0
  case PPCISD::MFFS:            return "PPCISD::MFFS";
1361
0
  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1362
0
  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1363
0
  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1364
0
  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1365
0
  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1366
0
  case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1367
0
  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1368
0
  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1369
0
  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1370
0
  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1371
0
  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1372
0
  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1373
0
  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1374
0
  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1375
0
  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1376
0
  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1377
0
  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1378
0
  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1379
0
  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1380
0
  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1381
0
  case PPCISD::SC:              return "PPCISD::SC";
1382
0
  case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1383
0
  case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1384
0
  case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1385
0
  case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1386
0
  case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1387
0
  case PPCISD::VABSD:           return "PPCISD::VABSD";
1388
0
  case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1389
0
  case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1390
0
  case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1391
0
  case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1392
0
  case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1393
0
  case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1394
0
  case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1395
0
  case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1396
0
  case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1397
0
  case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1398
0
  case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1399
0
  case PPCISD::FP_EXTEND_LH:    return "PPCISD::FP_EXTEND_LH";
1400
0
  }
1401
0
  return nullptr;
1402
0
}
1403
1404
EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1405
10.2k
                                          EVT VT) const {
1406
10.2k
  if (!VT.isVector())
1407
9.84k
    return Subtarget.useCRBits() ? 
MVT::i19.36k
:
MVT::i32478
;
1408
437
1409
437
  if (Subtarget.hasQPX())
1410
6
    return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1411
431
1412
431
  return VT.changeVectorElementTypeToInteger();
1413
431
}
1414
1415
550
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1416
550
  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1417
550
  return true;
1418
550
}
1419
1420
//===----------------------------------------------------------------------===//
1421
// Node matching predicates, for use by the tblgen matching code.
1422
//===----------------------------------------------------------------------===//
1423
1424
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1425
19
static bool isFloatingPointZero(SDValue Op) {
1426
19
  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1427
9
    return CFP->getValueAPF().isZero();
1428
10
  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1429
0
    // Maybe this has already been legalized into the constant pool?
1430
0
    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1431
0
      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1432
0
        return CFP->getValueAPF().isZero();
1433
10
  }
1434
10
  return false;
1435
10
}
1436
1437
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1438
/// true if Op is undef or if it matches the specified value.
1439
108k
static bool isConstantOrUndef(int Op, int Val) {
1440
108k
  return Op < 0 || 
Op == Val66.1k
;
1441
108k
}
1442
1443
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1444
/// VPKUHUM instruction.
1445
/// The ShuffleKind distinguishes between big-endian operations with
1446
/// two different inputs (0), either-endian operations with two identical
1447
/// inputs (1), and little-endian operations with two different inputs (2).
1448
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1449
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1450
4.22k
                               SelectionDAG &DAG) {
1451
4.22k
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1452
4.22k
  if (ShuffleKind == 0) {
1453
1.53k
    if (IsLE)
1454
509
      return false;
1455
1.24k
    
for (unsigned i = 0; 1.02k
i != 16;
++i212
)
1456
1.24k
      if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1457
1.02k
        return false;
1458
2.68k
  } else if (ShuffleKind == 2) {
1459
2.17k
    if (!IsLE)
1460
3
      return false;
1461
4.28k
    
for (unsigned i = 0; 2.16k
i != 16;
++i2.11k
)
1462
4.28k
      if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1463
2.16k
        return false;
1464
2.16k
  } else 
if (518
ShuffleKind == 1518
) {
1465
518
    unsigned j = IsLE ? 
0280
:
1238
;
1466
1.05k
    for (unsigned i = 0; i != 8; 
++i534
)
1467
1.02k
      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1468
1.02k
          
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j)660
)
1469
491
        return false;
1470
518
  }
1471
4.22k
  
return true30
;
1472
4.22k
}
1473
1474
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1475
/// VPKUWUM instruction.
1476
/// The ShuffleKind distinguishes between big-endian operations with
1477
/// two different inputs (0), either-endian operations with two identical
1478
/// inputs (1), and little-endian operations with two different inputs (2).
1479
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1480
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1481
4.27k
                               SelectionDAG &DAG) {
1482
4.27k
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1483
4.27k
  if (ShuffleKind == 0) {
1484
1.54k
    if (IsLE)
1485
509
      return false;
1486
1.17k
    
for (unsigned i = 0; 1.03k
i != 16;
i += 2144
)
1487
1.17k
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1488
1.17k
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3)161
)
1489
1.02k
        return false;
1490
2.73k
  } else if (ShuffleKind == 2) {
1491
2.18k
    if (!IsLE)
1492
3
      return false;
1493
3.88k
    
for (unsigned i = 0; 2.17k
i != 16;
i += 21.70k
)
1494
3.87k
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1495
3.87k
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1)2.14k
)
1496
2.16k
        return false;
1497
2.17k
  } else 
if (556
ShuffleKind == 1556
) {
1498
556
    unsigned j = IsLE ? 
0293
:
2263
;
1499
860
    for (unsigned i = 0; i != 8; 
i += 2304
)
1500
822
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1501
822
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1)456
||
1502
822
          
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j)423
||
1503
822
          
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)304
)
1504
518
        return false;
1505
556
  }
1506
4.27k
  
return true53
;
1507
4.27k
}
1508
1509
/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1510
/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1511
/// current subtarget.
1512
///
1513
/// The ShuffleKind distinguishes between big-endian operations with
1514
/// two different inputs (0), either-endian operations with two identical
1515
/// inputs (1), and little-endian operations with two different inputs (2).
1516
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1517
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1518
716
                               SelectionDAG &DAG) {
1519
716
  const PPCSubtarget& Subtarget =
1520
716
    static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1521
716
  if (!Subtarget.hasP8Vector())
1522
0
    return false;
1523
716
1524
716
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1525
716
  if (ShuffleKind == 0) {
1526
178
    if (IsLE)
1527
1
      return false;
1528
213
    
for (unsigned i = 0; 177
i != 16;
i += 436
)
1529
210
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1530
210
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+5)64
||
1531
210
          
!isConstantOrUndef(N->getMaskElt(i+2), i*2+6)64
||
1532
210
          
!isConstantOrUndef(N->getMaskElt(i+3), i*2+7)50
)
1533
174
        return false;
1534
538
  } else if (ShuffleKind == 2) {
1535
320
    if (!IsLE)
1536
0
      return false;
1537
463
    
for (unsigned i = 0; 320
i != 16;
i += 4143
)
1538
460
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1539
460
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1)189
||
1540
460
          
!isConstantOrUndef(N->getMaskElt(i+2), i*2+2)173
||
1541
460
          
!isConstantOrUndef(N->getMaskElt(i+3), i*2+3)145
)
1542
317
        return false;
1543
320
  } else 
if (218
ShuffleKind == 1218
) {
1544
218
    unsigned j = IsLE ? 
0141
:
477
;
1545
253
    for (unsigned i = 0; i != 8; 
i += 435
)
1546
247
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1547
247
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1)125
||
1548
247
          
!isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2)124
||
1549
247
          
!isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3)107
||
1550
247
          
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j)91
||
1551
247
          
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)35
||
1552
247
          
!isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2)35
||
1553
247
          
!isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)35
)
1554
212
        return false;
1555
218
  }
1556
716
  
return true12
;
1557
716
}
1558
1559
/// isVMerge - Common function, used to match vmrg* shuffles.
1560
///
1561
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1562
13.9k
                     unsigned LHSStart, unsigned RHSStart) {
1563
13.9k
  if (N->getValueType(0) != MVT::v16i8)
1564
0
    return false;
1565
13.9k
  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1566
13.9k
         "Unsupported merge size!");
1567
13.9k
1568
27.7k
  for (unsigned i = 0; i != 8/UnitSize; 
++i13.8k
) // Step over units
1569
51.5k
    
for (unsigned j = 0; 24.6k
j != UnitSize;
++j26.8k
) { // Step over bytes within unit
1570
37.6k
      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1571
37.6k
                             LHSStart+j+i*UnitSize) ||
1572
37.6k
          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1573
31.7k
                             RHSStart+j+i*UnitSize))
1574
10.8k
        return false;
1575
37.6k
    }
1576
13.9k
  
return true3.09k
;
1577
13.9k
}
1578
1579
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1580
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1581
/// The ShuffleKind distinguishes between big-endian merges with two
1582
/// different inputs (0), either-endian merges with two identical inputs (1),
1583
/// and little-endian merges with two different inputs (2).  For the latter,
1584
/// the input operands are swapped (see PPCInstrAltivec.td).
1585
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1586
10.7k
                             unsigned ShuffleKind, SelectionDAG &DAG) {
1587
10.7k
  if (DAG.getDataLayout().isLittleEndian()) {
1588
7.17k
    if (ShuffleKind == 1) // unary
1589
699
      return isVMerge(N, UnitSize, 0, 0);
1590
6.47k
    else if (ShuffleKind == 2) // swapped
1591
4.94k
      return isVMerge(N, UnitSize, 0, 16);
1592
1.52k
    else
1593
1.52k
      return false;
1594
3.53k
  } else {
1595
3.53k
    if (ShuffleKind == 1) // unary
1596
541
      return isVMerge(N, UnitSize, 8, 8);
1597
2.99k
    else if (ShuffleKind == 0) // normal
1598
2.98k
      return isVMerge(N, UnitSize, 8, 24);
1599
9
    else
1600
9
      return false;
1601
3.53k
  }
1602
10.7k
}
1603
1604
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1605
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1606
/// The ShuffleKind distinguishes between big-endian merges with two
1607
/// different inputs (0), either-endian merges with two identical inputs (1),
1608
/// and little-endian merges with two different inputs (2).  For the latter,
1609
/// the input operands are swapped (see PPCInstrAltivec.td).
1610
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1611
6.27k
                             unsigned ShuffleKind, SelectionDAG &DAG) {
1612
6.27k
  if (DAG.getDataLayout().isLittleEndian()) {
1613
3.04k
    if (ShuffleKind == 1) // unary
1614
529
      return isVMerge(N, UnitSize, 8, 8);
1615
2.51k
    else if (ShuffleKind == 2) // swapped
1616
990
      return isVMerge(N, UnitSize, 8, 24);
1617
1.52k
    else
1618
1.52k
      return false;
1619
3.23k
  } else {
1620
3.23k
    if (ShuffleKind == 1) // unary
1621
395
      return isVMerge(N, UnitSize, 0, 0);
1622
2.83k
    else if (ShuffleKind == 0) // normal
1623
2.82k
      return isVMerge(N, UnitSize, 0, 16);
1624
9
    else
1625
9
      return false;
1626
3.23k
  }
1627
6.27k
}
1628
1629
/**
1630
 * Common function used to match vmrgew and vmrgow shuffles
1631
 *
1632
 * The indexOffset determines whether to look for even or odd words in
1633
 * the shuffle mask. This is based on the of the endianness of the target
1634
 * machine.
1635
 *   - Little Endian:
1636
 *     - Use offset of 0 to check for odd elements
1637
 *     - Use offset of 4 to check for even elements
1638
 *   - Big Endian:
1639
 *     - Use offset of 0 to check for even elements
1640
 *     - Use offset of 4 to check for odd elements
1641
 * A detailed description of the vector element ordering for little endian and
1642
 * big endian can be found at
1643
 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1644
 * Targeting your applications - what little endian and big endian IBM XL C/C++
1645
 * compiler differences mean to you
1646
 *
1647
 * The mask to the shuffle vector instruction specifies the indices of the
1648
 * elements from the two input vectors to place in the result. The elements are
1649
 * numbered in array-access order, starting with the first vector. These vectors
1650
 * are always of type v16i8, thus each vector will contain 16 elements of size
1651
 * 8. More info on the shuffle vector can be found in the
1652
 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1653
 * Language Reference.
1654
 *
1655
 * The RHSStartValue indicates whether the same input vectors are used (unary)
1656
 * or two different input vectors are used, based on the following:
1657
 *   - If the instruction uses the same vector for both inputs, the range of the
1658
 *     indices will be 0 to 15. In this case, the RHSStart value passed should
1659
 *     be 0.
1660
 *   - If the instruction has two different vectors then the range of the
1661
 *     indices will be 0 to 31. In this case, the RHSStart value passed should
1662
 *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1663
 *     to 31 specify elements in the second vector).
1664
 *
1665
 * \param[in] N The shuffle vector SD Node to analyze
1666
 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1667
 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1668
 * vector to the shuffle_vector instruction
1669
 * \return true iff this shuffle vector represents an even or odd word merge
1670
 */
1671
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1672
1.41k
                     unsigned RHSStartValue) {
1673
1.41k
  if (N->getValueType(0) != MVT::v16i8)
1674
0
    return false;
1675
1.41k
1676
1.65k
  
for (unsigned i = 0; 1.41k
i < 2;
++i241
)
1677
3.01k
    
for (unsigned j = 0; 1.63k
j < 4;
++j1.38k
)
1678
2.77k
      if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1679
2.77k
                             i*RHSStartValue+j+IndexOffset) ||
1680
2.77k
          !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1681
1.64k
                             i*RHSStartValue+j+IndexOffset+8))
1682
1.39k
        return false;
1683
1.41k
  
return true26
;
1684
1.41k
}
1685
1686
/**
1687
 * Determine if the specified shuffle mask is suitable for the vmrgew or
1688
 * vmrgow instructions.
1689
 *
1690
 * \param[in] N The shuffle vector SD Node to analyze
1691
 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1692
 * \param[in] ShuffleKind Identify the type of merge:
1693
 *   - 0 = big-endian merge with two different inputs;
1694
 *   - 1 = either-endian merge with two identical inputs;
1695
 *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1696
 *     little-endian merges).
1697
 * \param[in] DAG The current SelectionDAG
1698
 * \return true iff this shuffle mask
1699
 */
1700
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1701
1.42k
                              unsigned ShuffleKind, SelectionDAG &DAG) {
1702
1.42k
  if (DAG.getDataLayout().isLittleEndian()) {
1703
922
    unsigned indexOffset = CheckEven ? 
4464
:
0458
;
1704
922
    if (ShuffleKind == 1) // Unary
1705
279
      return isVMerge(N, indexOffset, 0);
1706
643
    else if (ShuffleKind == 2) // swapped
1707
637
      return isVMerge(N, indexOffset, 16);
1708
6
    else
1709
6
      return false;
1710
504
  }
1711
504
  else {
1712
504
    unsigned indexOffset = CheckEven ? 
0256
:
4248
;
1713
504
    if (ShuffleKind == 1) // Unary
1714
151
      return isVMerge(N, indexOffset, 0);
1715
353
    else if (ShuffleKind == 0) // Normal
1716
351
      return isVMerge(N, indexOffset, 16);
1717
2
    else
1718
2
      return false;
1719
0
  }
1720
0
  return false;
1721
0
}
1722
1723
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1724
/// amount, otherwise return -1.
1725
/// The ShuffleKind distinguishes between big-endian operations with two
1726
/// different inputs (0), either-endian operations with two identical inputs
1727
/// (1), and little-endian operations with two different inputs (2).  For the
1728
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1729
int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1730
4.74k
                             SelectionDAG &DAG) {
1731
4.74k
  if (N->getValueType(0) != MVT::v16i8)
1732
0
    return -1;
1733
4.74k
1734
4.74k
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1735
4.74k
1736
4.74k
  // Find the first non-undef value in the shuffle mask.
1737
4.74k
  unsigned i;
1738
5.60k
  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; 
++i856
)
1739
856
    /*search*/;
1740
4.74k
1741
4.74k
  if (i == 16) 
return -10
; // all undef.
1742
4.74k
1743
4.74k
  // Otherwise, check to see if the rest of the elements are consecutively
1744
4.74k
  // numbered from this value.
1745
4.74k
  unsigned ShiftAmt = SVOp->getMaskElt(i);
1746
4.74k
  if (ShiftAmt < i) 
return -1146
;
1747
4.60k
1748
4.60k
  ShiftAmt -= i;
1749
4.60k
  bool isLE = DAG.getDataLayout().isLittleEndian();
1750
4.60k
1751
4.60k
  if ((ShuffleKind == 0 && 
!isLE1.52k
) ||
(3.58k
ShuffleKind == 23.58k
&&
isLE2.17k
)) {
1752
3.18k
    // Check the rest of the elements to see if they are consecutive.
1753
10.3k
    for (++i; i != 16; 
++i7.16k
)
1754
10.2k
      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1755
3.11k
        return -1;
1756
3.18k
  } else 
if (1.41k
ShuffleKind == 11.41k
) {
1757
907
    // Check the rest of the elements to see if they are consecutive.
1758
5.60k
    for (++i; i != 16; 
++i4.70k
)
1759
5.49k
      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1760
795
        return -1;
1761
907
  } else
1762
512
    return -1;
1763
183
1764
183
  if (isLE)
1765
52
    ShiftAmt = 16 - ShiftAmt;
1766
183
1767
183
  return ShiftAmt;
1768
183
}
1769
1770
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1771
/// specifies a splat of a single element that is suitable for input to
1772
/// VSPLTB/VSPLTH/VSPLTW.
1773
2.57k
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1774
2.57k
  assert(N->getValueType(0) == MVT::v16i8 &&
1775
2.57k
         (EltSize == 1 || EltSize == 2 || EltSize == 4));
1776
2.57k
1777
2.57k
  // The consecutive indices need to specify an element, not part of two
1778
2.57k
  // different elements.  So abandon ship early if this isn't the case.
1779
2.57k
  if (N->getMaskElt(0) % EltSize != 0)
1780
431
    return false;
1781
2.14k
1782
2.14k
  // This is a splat operation if each element of the permute is the same, and
1783
2.14k
  // if the value doesn't reference the second vector.
1784
2.14k
  unsigned ElementBase = N->getMaskElt(0);
1785
2.14k
1786
2.14k
  // FIXME: Handle UNDEF elements too!
1787
2.14k
  if (ElementBase >= 16)
1788
86
    return false;
1789
2.05k
1790
2.05k
  // Check that the indices are consecutive, in the case of a multi-byte element
1791
2.05k
  // splatted with a v16i8 mask.
1792
4.04k
  
for (unsigned i = 1; 2.05k
i != EltSize;
++i1.98k
)
1793
2.56k
    if (N->getMaskElt(i) < 0 || 
N->getMaskElt(i) != (int)(i+ElementBase)2.20k
)
1794
576
      return false;
1795
2.05k
1796
3.97k
  
for (unsigned i = EltSize, e = 16; 1.48k
i != e;
i += EltSize2.49k
) {
1797
3.70k
    if (N->getMaskElt(i) < 0) 
continue835
;
1798
6.04k
    
for (unsigned j = 0; 2.87k
j != EltSize;
++j3.17k
)
1799
4.39k
      if (N->getMaskElt(i+j) != N->getMaskElt(j))
1800
1.21k
        return false;
1801
2.87k
  }
1802
1.48k
  
return true269
;
1803
1.48k
}
1804
1805
/// Check that the mask is shuffling N byte elements. Within each N byte
1806
/// element of the mask, the indices could be either in increasing or
1807
/// decreasing order as long as they are consecutive.
1808
/// \param[in] N the shuffle vector SD Node to analyze
1809
/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1810
/// Word/DoubleWord/QuadWord).
1811
/// \param[in] StepLen the delta indices number among the N byte element, if
1812
/// the mask is in increasing/decreasing order then it is 1/-1.
1813
/// \return true iff the mask is shuffling N byte elements.
1814
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1815
17.6k
                                   int StepLen) {
1816
17.6k
  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1817
17.6k
         "Unexpected element width.");
1818
17.6k
  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1819
17.6k
1820
17.6k
  unsigned NumOfElem = 16 / Width;
1821
17.6k
  unsigned MaskVal[16]; //  Width is never greater than 16
1822
30.0k
  for (unsigned i = 0; i < NumOfElem; 
++i12.3k
) {
1823
29.1k
    MaskVal[0] = N->getMaskElt(i * Width);
1824
29.1k
    if ((StepLen == 1) && 
(MaskVal[0] % Width)21.7k
) {
1825
3.22k
      return false;
1826
25.8k
    } else if ((StepLen == -1) && 
((MaskVal[0] + 1) % Width)7.37k
) {
1827
7.03k
      return false;
1828
7.03k
    }
1829
18.8k
1830
50.4k
    
for (unsigned int j = 1; 18.8k
j < Width;
++j31.5k
) {
1831
38.0k
      MaskVal[j] = N->getMaskElt(i * Width + j);
1832
38.0k
      if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1833
6.49k
        return false;
1834
6.49k
      }
1835
38.0k
    }
1836
18.8k
  }
1837
17.6k
1838
17.6k
  
return true937
;
1839
17.6k
}
1840
1841
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1842
2.24k
                          unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1843
2.24k
  if (!isNByteElemShuffleMask(N, 4, 1))
1844
2.00k
    return false;
1845
243
1846
243
  // Now we look at mask elements 0,4,8,12
1847
243
  unsigned M0 = N->getMaskElt(0) / 4;
1848
243
  unsigned M1 = N->getMaskElt(4) / 4;
1849
243
  unsigned M2 = N->getMaskElt(8) / 4;
1850
243
  unsigned M3 = N->getMaskElt(12) / 4;
1851
243
  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1852
243
  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1853
243
1854
243
  // Below, let H and L be arbitrary elements of the shuffle mask
1855
243
  // where H is in the range [4,7] and L is in the range [0,3].
1856
243
  // H, 1, 2, 3 or L, 5, 6, 7
1857
243
  if ((M0 > 3 && 
M1 == 164
&&
M2 == 220
&&
M3 == 316
) ||
1858
243
      
(227
M0 < 4227
&&
M1 == 5179
&&
M2 == 620
&&
M3 == 716
)) {
1859
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M0 & 0x3]16
:
BigEndianShifts[M0 & 0x3]16
;
1860
32
    InsertAtByte = IsLE ? 
1216
:
016
;
1861
32
    Swap = M0 < 4;
1862
32
    return true;
1863
32
  }
1864
211
  // 0, H, 2, 3 or 4, L, 6, 7
1865
211
  if ((M1 > 3 && 
M0 == 052
&&
M2 == 220
&&
M3 == 316
) ||
1866
211
      
(195
M1 < 4195
&&
M0 == 4159
&&
M2 == 616
&&
M3 == 716
)) {
1867
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M1 & 0x3]16
:
BigEndianShifts[M1 & 0x3]16
;
1868
32
    InsertAtByte = IsLE ? 
816
:
416
;
1869
32
    Swap = M1 < 4;
1870
32
    return true;
1871
32
  }
1872
179
  // 0, 1, H, 3 or 4, 5, L, 7
1873
179
  if ((M2 > 3 && 
M0 == 070
&&
M1 == 152
&&
M3 == 352
) ||
1874
179
      
(163
M2 < 4163
&&
M0 == 4109
&&
M1 == 516
&&
M3 == 716
)) {
1875
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M2 & 0x3]16
:
BigEndianShifts[M2 & 0x3]16
;
1876
32
    InsertAtByte = IsLE ? 
416
:
816
;
1877
32
    Swap = M2 < 4;
1878
32
    return true;
1879
32
  }
1880
147
  // 0, 1, 2, H or 4, 5, 6, L
1881
147
  if ((M3 > 3 && 
M0 == 059
&&
M1 == 156
&&
M2 == 252
) ||
1882
147
      
(131
M3 < 4131
&&
M0 == 488
&&
M1 == 516
&&
M2 == 616
)) {
1883
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M3 & 0x3]16
:
BigEndianShifts[M3 & 0x3]16
;
1884
32
    InsertAtByte = IsLE ? 
016
:
1216
;
1885
32
    Swap = M3 < 4;
1886
32
    return true;
1887
32
  }
1888
115
1889
115
  // If both vector operands for the shuffle are the same vector, the mask will
1890
115
  // contain only elements from the first one and the second one will be undef.
1891
115
  if (N->getOperand(1).isUndef()) {
1892
72
    ShiftElts = 0;
1893
72
    Swap = true;
1894
72
    unsigned XXINSERTWSrcElem = IsLE ? 
234
:
138
;
1895
72
    if (M0 == XXINSERTWSrcElem && 
M1 == 13
&&
M2 == 22
&&
M3 == 32
) {
1896
2
      InsertAtByte = IsLE ? 
121
:
01
;
1897
2
      return true;
1898
2
    }
1899
70
    if (M0 == 0 && 
M1 == XXINSERTWSrcElem48
&&
M2 == 24
&&
M3 == 33
) {
1900
1
      InsertAtByte = IsLE ? 8 : 
40
;
1901
1
      return true;
1902
1
    }
1903
69
    if (M0 == 0 && 
M1 == 147
&&
M2 == XXINSERTWSrcElem6
&&
M3 == 33
) {
1904
1
      InsertAtByte = IsLE ? 
40
: 8;
1905
1
      return true;
1906
1
    }
1907
68
    if (M0 == 0 && 
M1 == 146
&&
M2 == 25
&&
M3 == XXINSERTWSrcElem4
) {
1908
2
      InsertAtByte = IsLE ? 
01
:
121
;
1909
2
      return true;
1910
2
    }
1911
109
  }
1912
109
1913
109
  return false;
1914
109
}
1915
1916
bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1917
3.05k
                               bool &Swap, bool IsLE) {
1918
3.05k
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1919
3.05k
  // Ensure each byte index of the word is consecutive.
1920
3.05k
  if (!isNByteElemShuffleMask(N, 4, 1))
1921
2.73k
    return false;
1922
316
1923
316
  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1924
316
  unsigned M0 = N->getMaskElt(0) / 4;
1925
316
  unsigned M1 = N->getMaskElt(4) / 4;
1926
316
  unsigned M2 = N->getMaskElt(8) / 4;
1927
316
  unsigned M3 = N->getMaskElt(12) / 4;
1928
316
1929
316
  // If both vector operands for the shuffle are the same vector, the mask will
1930
316
  // contain only elements from the first one and the second one will be undef.
1931
316
  if (N->getOperand(1).isUndef()) {
1932
172
    assert(M0 < 4 && "Indexing into an undef vector?");
1933
172
    if (M1 != (M0 + 1) % 4 || 
M2 != (M1 + 1) % 439
||
M3 != (M2 + 1) % 438
)
1934
140
      return false;
1935
32
1936
32
    ShiftElts = IsLE ? 
(4 - M0) % 49
:
M023
;
1937
32
    Swap = false;
1938
32
    return true;
1939
32
  }
1940
144
1941
144
  // Ensure each word index of the ShuffleVector Mask is consecutive.
1942
144
  if (M1 != (M0 + 1) % 8 || 
M2 != (M1 + 1) % 8103
||
M3 != (M2 + 1) % 843
)
1943
102
    return false;
1944
42
1945
42
  if (IsLE) {
1946
14
    if (M0 == 0 || M0 == 7 || 
M0 == 610
||
M0 == 58
) {
1947
8
      // Input vectors don't need to be swapped if the leading element
1948
8
      // of the result is one of the 3 left elements of the second vector
1949
8
      // (or if there is no shift to be done at all).
1950
8
      Swap = false;
1951
8
      ShiftElts = (8 - M0) % 8;
1952
8
    } else 
if (6
M0 == 46
||
M0 == 36
||
M0 == 24
||
M0 == 12
) {
1953
6
      // Input vectors need to be swapped if the leading element
1954
6
      // of the result is one of the 3 left elements of the first vector
1955
6
      // (or if we're shifting by 4 - thereby simply swapping the vectors).
1956
6
      Swap = true;
1957
6
      ShiftElts = (4 - M0) % 4;
1958
6
    }
1959
14
1960
14
    return true;
1961
28
  } else {                                          // BE
1962
28
    if (M0 == 0 || M0 == 1 || 
M0 == 219
||
M0 == 313
) {
1963
20
      // Input vectors don't need to be swapped if the leading element
1964
20
      // of the result is one of the 4 elements of the first vector.
1965
20
      Swap = false;
1966
20
      ShiftElts = M0;
1967
20
    } else 
if (8
M0 == 48
||
M0 == 58
||
M0 == 66
||
M0 == 74
) {
1968
8
      // Input vectors need to be swapped if the leading element
1969
8
      // of the result is one of the 4 elements of the right vector.
1970
8
      Swap = true;
1971
8
      ShiftElts = M0 - 4;
1972
8
    }
1973
28
1974
28
    return true;
1975
28
  }
1976
42
}
1977
1978
7.30k
bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1979
7.30k
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1980
7.30k
1981
7.30k
  if (!isNByteElemShuffleMask(N, Width, -1))
1982
7.28k
    return false;
1983
20
1984
50
  
for (int i = 0; 20
i < 16;
i += Width30
)
1985
42
    if (N->getMaskElt(i) != i + Width - 1)
1986
12
      return false;
1987
20
1988
20
  
return true8
;
1989
20
}
1990
1991
1.82k
bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1992
1.82k
  return isXXBRShuffleMaskHelper(N, 2);
1993
1.82k
}
1994
1995
1.82k
bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1996
1.82k
  return isXXBRShuffleMaskHelper(N, 4);
1997
1.82k
}
1998
1999
1.82k
bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2000
1.82k
  return isXXBRShuffleMaskHelper(N, 8);
2001
1.82k
}
2002
2003
1.82k
bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2004
1.82k
  return isXXBRShuffleMaskHelper(N, 16);
2005
1.82k
}
2006
2007
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2008
/// if the inputs to the instruction should be swapped and set \p DM to the
2009
/// value for the immediate.
2010
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2011
/// AND element 0 of the result comes from the first input (LE) or second input
2012
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2013
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2014
/// mask.
2015
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2016
2.97k
                               bool &Swap, bool IsLE) {
2017
2.97k
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2018
2.97k
2019
2.97k
  // Ensure each byte index of the double word is consecutive.
2020
2.97k
  if (!isNByteElemShuffleMask(N, 8, 1))
2021
2.92k
    return false;
2022
58
2023
58
  unsigned M0 = N->getMaskElt(0) / 8;
2024
58
  unsigned M1 = N->getMaskElt(8) / 8;
2025
58
  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2026
58
2027
58
  // If both vector operands for the shuffle are the same vector, the mask will
2028
58
  // contain only elements from the first one and the second one will be undef.
2029
58
  if (N->getOperand(1).isUndef()) {
2030
0
    if ((M0 | M1) < 2) {
2031
0
      DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2032
0
      Swap = false;
2033
0
      return true;
2034
0
    } else
2035
0
      return false;
2036
58
  }
2037
58
2038
58
  if (IsLE) {
2039
37
    if (M0 > 1 && 
M1 < 20
) {
2040
0
      Swap = false;
2041
37
    } else if (M0 < 2 && M1 > 1) {
2042
37
      M0 = (M0 + 2) % 4;
2043
37
      M1 = (M1 + 2) % 4;
2044
37
      Swap = true;
2045
37
    } else
2046
0
      return false;
2047
37
2048
37
    // Note: if control flow comes here that means Swap is already set above
2049
37
    DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2050
37
    return true;
2051
37
  } else { // BE
2052
21
    if (M0 < 2 && M1 > 1) {
2053
21
      Swap = false;
2054
21
    } else 
if (0
M0 > 10
&&
M1 < 20
) {
2055
0
      M0 = (M0 + 2) % 4;
2056
0
      M1 = (M1 + 2) % 4;
2057
0
      Swap = true;
2058
0
    } else
2059
0
      return false;
2060
21
2061
21
    // Note: if control flow comes here that means Swap is already set above
2062
21
    DM = (M0 << 1) + (M1 & 1);
2063
21
    return true;
2064
21
  }
2065
58
}
2066
2067
2068
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2069
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2070
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2071
165
                                SelectionDAG &DAG) {
2072
165
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2073
165
  assert(isSplatShuffleMask(SVOp, EltSize));
2074
165
  if (DAG.getDataLayout().isLittleEndian())
2075
78
    return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2076
87
  else
2077
87
    return SVOp->getMaskElt(0) / EltSize;
2078
165
}
2079
2080
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2081
/// by using a vspltis[bhw] instruction of the specified element size, return
2082
/// the constant being splatted.  The ByteSize field indicates the number of
2083
/// bytes of each element [124] -> [bhw].
2084
230
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2085
230
  SDValue OpVal(nullptr, 0);
2086
230
2087
230
  // If ByteSize of the splat is bigger than the element size of the
2088
230
  // build_vector, then we have a case where we are checking for a splat where
2089
230
  // multiple elements of the buildvector are folded together into a single
2090
230
  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2091
230
  unsigned EltSize = 16/N->getNumOperands();
2092
230
  if (EltSize < ByteSize) {
2093
0
    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2094
0
    SDValue UniquedVals[4];
2095
0
    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2096
0
2097
0
    // See if all of the elements in the buildvector agree across.
2098
0
    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2099
0
      if (N->getOperand(i).isUndef()) continue;
2100
0
      // If the element isn't a constant, bail fully out.
2101
0
      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2102
0
2103
0
      if (!UniquedVals[i&(Multiple-1)].getNode())
2104
0
        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2105
0
      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2106
0
        return SDValue();  // no match.
2107
0
    }
2108
0
2109
0
    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2110
0
    // either constant or undef values that are identical for each chunk.  See
2111
0
    // if these chunks can form into a larger vspltis*.
2112
0
2113
0
    // Check to see if all of the leading entries are either 0 or -1.  If
2114
0
    // neither, then this won't fit into the immediate field.
2115
0
    bool LeadingZero = true;
2116
0
    bool LeadingOnes = true;
2117
0
    for (unsigned i = 0; i != Multiple-1; ++i) {
2118
0
      if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2119
0
2120
0
      LeadingZero &= isNullConstant(UniquedVals[i]);
2121
0
      LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2122
0
    }
2123
0
    // Finally, check the least significant entry.
2124
0
    if (LeadingZero) {
2125
0
      if (!UniquedVals[Multiple-1].getNode())
2126
0
        return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2127
0
      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2128
0
      if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2129
0
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2130
0
    }
2131
0
    if (LeadingOnes) {
2132
0
      if (!UniquedVals[Multiple-1].getNode())
2133
0
        return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2134
0
      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2135
0
      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2136
0
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2137
0
    }
2138
0
2139
0
    return SDValue();
2140
0
  }
2141
230
2142
230
  // Check to see if this buildvec has a single non-undef value in its elements.
2143
2.70k
  
for (unsigned i = 0, e = N->getNumOperands(); 230
i != e;
++i2.47k
) {
2144
2.47k
    if (N->getOperand(i).isUndef()) 
continue0
;
2145
2.47k
    if (!OpVal.getNode())
2146
230
      OpVal = N->getOperand(i);
2147
2.24k
    else if (OpVal != N->getOperand(i))
2148
0
      return SDValue();
2149
2.47k
  }
2150
230
2151
230
  if (!OpVal.getNode()) 
return SDValue()0
; // All UNDEF: use implicit def.
2152
230
2153
230
  unsigned ValSizeInBytes = EltSize;
2154
230
  uint64_t Value = 0;
2155
230
  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2156
230
    Value = CN->getZExtValue();
2157
230
  } else 
if (ConstantFPSDNode *0
CN0
= dyn_cast<ConstantFPSDNode>(OpVal)) {
2158
0
    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2159
0
    Value = FloatToBits(CN->getValueAPF().convertToFloat());
2160
0
  }
2161
230
2162
230
  // If the splat value is larger than the element value, then we can never do
2163
230
  // this splat.  The only case that we could fit the replicated bits into our
2164
230
  // immediate field for would be zero, and we prefer to use vxor for it.
2165
230
  if (ValSizeInBytes < ByteSize) 
return SDValue()0
;
2166
230
2167
230
  // If the element value is larger than the splat value, check if it consists
2168
230
  // of a repeated bit pattern of size ByteSize.
2169
230
  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2170
0
    return SDValue();
2171
230
2172
230
  // Properly sign extend the value.
2173
230
  int MaskVal = SignExtend32(Value, ByteSize * 8);
2174
230
2175
230
  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2176
230
  if (MaskVal == 0) 
return SDValue()42
;
2177
188
2178
188
  // Finally, if this value fits in a 5 bit sext field, return it
2179
188
  if (SignExtend32<5>(MaskVal) == MaskVal)
2180
188
    return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2181
0
  return SDValue();
2182
0
}
2183
2184
/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2185
/// amount, otherwise return -1.
2186
66
int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2187
66
  EVT VT = N->getValueType(0);
2188
66
  if (VT != MVT::v4f64 && 
VT != MVT::v4f3224
&&
VT != MVT::v4i10
)
2189
0
    return -1;
2190
66
2191
66
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2192
66
2193
66
  // Find the first non-undef value in the shuffle mask.
2194
66
  unsigned i;
2195
66
  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; 
++i0
)
2196
0
    /*search*/;
2197
66
2198
66
  if (i == 4) 
return -10
; // all undef.
2199
66
2200
66
  // Otherwise, check to see if the rest of the elements are consecutively
2201
66
  // numbered from this value.
2202
66
  unsigned ShiftAmt = SVOp->getMaskElt(i);
2203
66
  if (ShiftAmt < i) 
return -10
;
2204
66
  ShiftAmt -= i;
2205
66
2206
66
  // Check the rest of the elements to see if they are consecutive.
2207
82
  for (++i; i != 4; 
++i16
)
2208
82
    if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2209
66
      return -1;
2210
66
2211
66
  
return ShiftAmt0
;
2212
66
}
2213
2214
//===----------------------------------------------------------------------===//
2215
//  Addressing Mode Selection
2216
//===----------------------------------------------------------------------===//
2217
2218
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2219
/// or 64-bit immediate, and if the value can be accurately represented as a
2220
/// sign extension from a 16-bit value.  If so, this returns true and the
2221
/// immediate.
2222
17.7k
bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2223
17.7k
  if (!isa<ConstantSDNode>(N))
2224
1.49k
    return false;
2225
16.2k
2226
16.2k
  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2227
16.2k
  if (N->getValueType(0) == MVT::i32)
2228
2.91k
    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2229
13.3k
  else
2230
13.3k
    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2231
16.2k
}
2232
17.6k
bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2233
17.6k
  return isIntS16Immediate(Op.getNode(), Imm);
2234
17.6k
}
2235
2236
2237
/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2238
/// be represented as an indexed [r+r] operation.
2239
bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2240
                                               SDValue &Index,
2241
43
                                               SelectionDAG &DAG) const {
2242
43
  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2243
84
      UI != E; 
++UI41
) {
2244
47
    if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2245
43
      if (Memop->getMemoryVT() == MVT::f64) {
2246
6
          Base = N.getOperand(0);
2247
6
          Index = N.getOperand(1);
2248
6
          return true;
2249
6
      }
2250
43
    }
2251
47
  }
2252
43
  
return false37
;
2253
43
}
2254
2255
/// SelectAddressRegReg - Given the specified addressed, check to see if it
2256
/// can be represented as an indexed [r+r] operation.  Returns false if it
2257
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2258
/// non-zero and N can be represented by a base register plus a signed 16-bit
2259
/// displacement, make a more precise judgement by checking (displacement % \p
2260
/// EncodingAlignment).
2261
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2262
                                            SDValue &Index, SelectionDAG &DAG,
2263
18.9k
                                            unsigned EncodingAlignment) const {
2264
18.9k
  int16_t imm = 0;
2265
18.9k
  if (N.getOpcode() == ISD::ADD) {
2266
7.85k
    // Is there any SPE load/store (f64), which can't handle 16bit offset?
2267
7.85k
    // SPE load/store can only handle 8-bit offsets.
2268
7.85k
    if (hasSPE() && 
SelectAddressEVXRegReg(N, Base, Index, DAG)43
)
2269
6
        return true;
2270
7.84k
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2271
7.84k
        
(6.96k
!EncodingAlignment6.96k
||
!(imm % EncodingAlignment)2.89k
))
2272
6.93k
      return false; // r+i
2273
911
    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2274
256
      return false;    // r+i
2275
655
2276
655
    Base = N.getOperand(0);
2277
655
    Index = N.getOperand(1);
2278
655
    return true;
2279
11.1k
  } else if (N.getOpcode() == ISD::OR) {
2280
923
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2281
923
        
(903
!EncodingAlignment903
||
!(imm % EncodingAlignment)113
))
2282
903
      return false; // r+i can fold it if we can.
2283
20
2284
20
    // If this is an or of disjoint bitfields, we can codegen this as an add
2285
20
    // (for better address arithmetic) if the LHS and RHS of the OR are provably
2286
20
    // disjoint.
2287
20
    KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2288
20
2289
20
    if (LHSKnown.Zero.getBoolValue()) {
2290
20
      KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2291
20
      // If all of the bits are known zero on the LHS or RHS, the add won't
2292
20
      // carry.
2293
20
      if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2294
20
        Base = N.getOperand(0);
2295
20
        Index = N.getOperand(1);
2296
20
        return true;
2297
20
      }
2298
10.2k
    }
2299
20
  }
2300
10.2k
2301
10.2k
  return false;
2302
10.2k
}
2303
2304
// If we happen to be doing an i64 load or store into a stack slot that has
2305
// less than a 4-byte alignment, then the frame-index elimination may need to
2306
// use an indexed load or store instruction (because the offset may not be a
2307
// multiple of 4). The extra register needed to hold the offset comes from the
2308
// register scavenger, and it is possible that the scavenger will need to use
2309
// an emergency spill slot. As a result, we need to make sure that a spill slot
2310
// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2311
// stack slot.
2312
3.48k
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2313
3.48k
  // FIXME: This does not handle the LWA case.
2314
3.48k
  if (VT != MVT::i64)
2315
1.39k
    return;
2316
2.09k
2317
2.09k
  // NOTE: We'll exclude negative FIs here, which come from argument
2318
2.09k
  // lowering, because there are no known test cases triggering this problem
2319
2.09k
  // using packed structures (or similar). We can remove this exclusion if
2320
2.09k
  // we find such a test case. The reason why this is so test-case driven is
2321
2.09k
  // because this entire 'fixup' is only to prevent crashes (from the
2322
2.09k
  // register scavenger) on not-really-valid inputs. For example, if we have:
2323
2.09k
  //   %a = alloca i1
2324
2.09k
  //   %b = bitcast i1* %a to i64*
2325
2.09k
  //   store i64* a, i64 b
2326
2.09k
  // then the store should really be marked as 'align 1', but is not. If it
2327
2.09k
  // were marked as 'align 1' then the indexed form would have been
2328
2.09k
  // instruction-selected initially, and the problem this 'fixup' is preventing
2329
2.09k
  // won't happen regardless.
2330
2.09k
  if (FrameIdx < 0)
2331
586
    return;
2332
1.50k
2333
1.50k
  MachineFunction &MF = DAG.getMachineFunction();
2334
1.50k
  MachineFrameInfo &MFI = MF.getFrameInfo();
2335
1.50k
2336
1.50k
  unsigned Align = MFI.getObjectAlignment(FrameIdx);
2337
1.50k
  if (Align >= 4)
2338
1.44k
    return;
2339
56
2340
56
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2341
56
  FuncInfo->setHasNonRISpills();
2342
56
}
2343
2344
/// Returns true if the address N can be represented by a base register plus
2345
/// a signed 16-bit displacement [r+imm], and if it is not better
2346
/// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2347
/// displacements that are multiples of that value.
2348
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2349
                                            SDValue &Base,
2350
                                            SelectionDAG &DAG,
2351
11.1k
                                            unsigned EncodingAlignment) const {
2352
11.1k
  // FIXME dl should come from parent load or store, not from address
2353
11.1k
  SDLoc dl(N);
2354
11.1k
  // If this can be more profitably realized as r+r, fail.
2355
11.1k
  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2356
232
    return false;
2357
10.8k
2358
10.8k
  if (N.getOpcode() == ISD::ADD) {
2359
4.49k
    int16_t imm = 0;
2360
4.49k
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2361
4.49k
        
(4.32k
!EncodingAlignment4.32k
||
(imm % EncodingAlignment) == 02.29k
)) {
2362
4.32k
      Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2363
4.32k
      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2364
317
        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2365
317
        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2366
4.00k
      } else {
2367
4.00k
        Base = N.getOperand(0);
2368
4.00k
      }
2369
4.32k
      return true; // [r+i]
2370
4.32k
    } else 
if (176
N.getOperand(1).getOpcode() == PPCISD::Lo176
) {
2371
176
      // Match LOAD (ADD (X, Lo(G))).
2372
176
      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2373
176
             && "Cannot handle constant offsets yet!");
2374
176
      Disp = N.getOperand(1).getOperand(0);  // The global address.
2375
176
      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2376
176
             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2377
176
             Disp.getOpcode() == ISD::TargetConstantPool ||
2378
176
             Disp.getOpcode() == ISD::TargetJumpTable);
2379
176
      Base = N.getOperand(0);
2380
176
      return true;  // [&g+r]
2381
176
    }
2382
6.37k
  } else if (N.getOpcode() == ISD::OR) {
2383
896
    int16_t imm = 0;
2384
896
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2385
896
        (!EncodingAlignment || 
(imm % EncodingAlignment) == 0113
)) {
2386
896
      // If this is an or of disjoint bitfields, we can codegen this as an add
2387
896
      // (for better address arithmetic) if the LHS and RHS of the OR are
2388
896
      // provably disjoint.
2389
896
      KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2390
896
2391
896
      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2392
896
        // If all of the bits are known zero on the LHS or RHS, the add won't
2393
896
        // carry.
2394
896
        if (FrameIndexSDNode *FI =
2395
893
              dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2396
893
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2397
893
          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2398
893
        } else {
2399
3
          Base = N.getOperand(0);
2400
3
        }
2401
896
        Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2402
896
        return true;
2403
896
      }
2404
5.48k
    }
2405
5.48k
  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2406
127
    // Loading from a constant address.
2407
127
2408
127
    // If this address fits entirely in a 16-bit sext immediate field, codegen
2409
127
    // this as "d, 0"
2410
127
    int16_t Imm;
2411
127
    if (isIntS16Immediate(CN, Imm) &&
2412
127
        
(117
!EncodingAlignment117
||
(Imm % EncodingAlignment) == 037
)) {
2413
117
      Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2414
117
      Base = DAG.getRegister(Subtarget.isPPC64() ? 
PPC::ZERO848
:
PPC::ZERO69
,
2415
117
                             CN->getValueType(0));
2416
117
      return true;
2417
117
    }
2418
10
2419
10
    // Handle 32-bit sext immediates with LIS + addr mode.
2420
10
    if ((CN->getValueType(0) == MVT::i32 ||
2421
10
         
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()5
) &&
2422
10
        
(7
!EncodingAlignment7
||
(CN->getZExtValue() % EncodingAlignment) == 01
)) {
2423
7
      int Addr = (int)CN->getZExtValue();
2424
7
2425
7
      // Otherwise, break this down into an LIS + disp.
2426
7
      Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2427
7
2428
7
      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2429
7
                                   MVT::i32);
2430
7
      unsigned Opc = CN->getValueType(0) == MVT::i32 ? 
PPC::LIS5
:
PPC::LIS82
;
2431
7
      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2432
7
      return true;
2433
7
    }
2434
5.35k
  }
2435
5.35k
2436
5.35k
  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2437
5.35k
  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2438
2.27k
    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2439
2.27k
    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2440
2.27k
  } else
2441
3.08k
    Base = N;
2442
5.35k
  return true;      // [r+0]
2443
5.35k
}
2444
2445
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2446
/// represented as an indexed [r+r] operation.
2447
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2448
                                                SDValue &Index,
2449
5.77k
                                                SelectionDAG &DAG) const {
2450
5.77k
  // Check to see if we can easily represent this as an [r+r] address.  This
2451
5.77k
  // will fail if it thinks that the address is more profitably represented as
2452
5.77k
  // reg+imm, e.g. where imm = 0.
2453
5.77k
  if (SelectAddressRegReg(N, Base, Index, DAG))
2454
124
    return true;
2455
5.64k
2456
5.64k
  // If the address is the result of an add, we will utilize the fact that the
2457
5.64k
  // address calculation includes an implicit add.  However, we can reduce
2458
5.64k
  // register pressure if we do not materialize a constant just for use as the
2459
5.64k
  // index register.  We only get rid of the add if it is not an add of a
2460
5.64k
  // value and a 16-bit signed constant and both have a single use.
2461
5.64k
  int16_t imm = 0;
2462
5.64k
  if (N.getOpcode() == ISD::ADD &&
2463
5.64k
      
(1.28k
!isIntS16Immediate(N.getOperand(1), imm)1.28k
||
2464
1.28k
       
!N.getOperand(1).hasOneUse()1.26k
||
!N.getOperand(0).hasOneUse()865
)) {
2465
1.15k
    Base = N.getOperand(0);
2466
1.15k
    Index = N.getOperand(1);
2467
1.15k
    return true;
2468
1.15k
  }
2469
4.49k
2470
4.49k
  // Otherwise, do it the hard way, using R0 as the base register.
2471
4.49k
  Base = DAG.getRegister(Subtarget.isPPC64() ? 
PPC::ZERO84.22k
:
PPC::ZERO272
,
2472
4.49k
                         N.getValueType());
2473
4.49k
  Index = N;
2474
4.49k
  return true;
2475
4.49k
}
2476
2477
/// Returns true if we should use a direct load into vector instruction
2478
/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2479
518
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2480
518
2481
518
  // If there are any other uses other than scalar to vector, then we should
2482
518
  // keep it as a scalar load -> direct move pattern to prevent multiple
2483
518
  // loads.
2484
518
  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2485
518
  if (!LD)
2486
0
    return false;
2487
518
2488
518
  EVT MemVT = LD->getMemoryVT();
2489
518
  if (!MemVT.isSimple())
2490
0
    return false;
2491
518
  switch(MemVT.getSimpleVT().SimpleTy) {
2492
518
  case MVT::i64:
2493
100
    break;
2494
518
  case MVT::i32:
2495
195
    if (!ST.hasP8Vector())
2496
163
      return false;
2497
32
    break;
2498
122
  case MVT::i16:
2499
122
  case MVT::i8:
2500
122
    if (!ST.hasP9Vector())
2501
118
      return false;
2502
4
    break;
2503
101
  default:
2504
101
    return false;
2505
136
  }
2506
136
2507
136
  SDValue LoadedVal(N, 0);
2508
136
  if (!LoadedVal.hasOneUse())
2509
9
    return false;
2510
127
2511
127
  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2512
211
       UI != UE; 
++UI84
)
2513
203
    if (UI.getUse().get().getResNo() == 0 &&
2514
203
        
UI->getOpcode() != ISD::SCALAR_TO_VECTOR127
)
2515
119
      return false;
2516
127
2517
127
  
return true8
;
2518
127
}
2519
2520
/// getPreIndexedAddressParts - returns true by value, base pointer and
2521
/// offset pointer and addressing mode by reference if the node's address
2522
/// can be legally represented as pre-indexed load / store address.
2523
bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2524
                                                  SDValue &Offset,
2525
                                                  ISD::MemIndexedMode &AM,
2526
929
                                                  SelectionDAG &DAG) const {
2527
929
  if (DisablePPCPreinc) 
return false0
;
2528
929
2529
929
  bool isLoad = true;
2530
929
  SDValue Ptr;
2531
929
  EVT VT;
2532
929
  unsigned Alignment;
2533
929
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2534
518
    Ptr = LD->getBasePtr();
2535
518
    VT = LD->getMemoryVT();
2536
518
    Alignment = LD->getAlignment();
2537
518
  } else 
if (StoreSDNode *411
ST411
= dyn_cast<StoreSDNode>(N)) {
2538
411
    Ptr = ST->getBasePtr();
2539
411
    VT  = ST->getMemoryVT();
2540
411
    Alignment = ST->getAlignment();
2541
411
    isLoad = false;
2542
411
  } else
2543
0
    return false;
2544
929
2545
929
  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2546
929
  // instructions because we can fold these into a more efficient instruction
2547
929
  // instead, (such as LXSD).
2548
929
  if (isLoad && 
usePartialVectorLoads(N, Subtarget)518
) {
2549
8
    return false;
2550
8
  }
2551
921
2552
921
  // PowerPC doesn't have preinc load/store instructions for vectors (except
2553
921
  // for QPX, which does have preinc r+r forms).
2554
921
  if (VT.isVector()) {
2555
1
    if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && 
VT != MVT::v4f320
)) {
2556
0
      return false;
2557
1
    } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2558
1
      AM = ISD::PRE_INC;
2559
1
      return true;
2560
1
    }
2561
920
  }
2562
920
2563
920
  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2564
77
    // Common code will reject creating a pre-inc form if the base pointer
2565
77
    // is a frame index, or if N is a store and the base pointer is either
2566
77
    // the same as or a predecessor of the value being stored.  Check for
2567
77
    // those situations here, and try with swapped Base/Offset instead.
2568
77
    bool Swap = false;
2569
77
2570
77
    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2571
0
      Swap = true;
2572
77
    else if (!isLoad) {
2573
17
      SDValue Val = cast<StoreSDNode>(N)->getValue();
2574
17
      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2575
8
        Swap = true;
2576
17
    }
2577
77
2578
77
    if (Swap)
2579
8
      std::swap(Base, Offset);
2580
77
2581
77
    AM = ISD::PRE_INC;
2582
77
    return true;
2583
77
  }
2584
843
2585
843
  // LDU/STU can only handle immediates that are a multiple of 4.
2586
843
  if (VT != MVT::i64) {
2587
646
    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2588
0
      return false;
2589
197
  } else {
2590
197
    // LDU/STU need an address with at least 4-byte alignment.
2591
197
    if (Alignment < 4)
2592
4
      return false;
2593
193
2594
193
    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2595
0
      return false;
2596
839
  }
2597
839
2598
839
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2599
447
    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2600
447
    // sext i32 to i64 when addr mode is r+i.
2601
447
    if (LD->getValueType(0) == MVT::i64 && 
LD->getMemoryVT() == MVT::i32103
&&
2602
447
        
LD->getExtensionType() == ISD::SEXTLOAD1
&&
2603
447
        
isa<ConstantSDNode>(Offset)0
)
2604
0
      return false;
2605
839
  }
2606
839
2607
839
  AM = ISD::PRE_INC;
2608
839
  return true;
2609
839
}
2610
2611
//===----------------------------------------------------------------------===//
2612
//  LowerOperation implementation
2613
//===----------------------------------------------------------------------===//
2614
2615
/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2616
/// and LoOpFlags to the target MO flags.
2617
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2618
                               unsigned &HiOpFlags, unsigned &LoOpFlags,
2619
334
                               const GlobalValue *GV = nullptr) {
2620
334
  HiOpFlags = PPCII::MO_HA;
2621
334
  LoOpFlags = PPCII::MO_LO;
2622
334
2623
334
  // Don't use the pic base if not in PIC relocation model.
2624
334
  if (IsPIC) {
2625
29
    HiOpFlags |= PPCII::MO_PIC_FLAG;
2626
29
    LoOpFlags |= PPCII::MO_PIC_FLAG;
2627
29
  }
2628
334
2629
334
  // If this is a reference to a global value that requires a non-lazy-ptr, make
2630
334
  // sure that instruction lowering adds it.
2631
334
  if (GV && 
Subtarget.hasLazyResolverStub(GV)233
) {
2632
0
    HiOpFlags |= PPCII::MO_NLP_FLAG;
2633
0
    LoOpFlags |= PPCII::MO_NLP_FLAG;
2634
0
2635
0
    if (GV->hasHiddenVisibility()) {
2636
0
      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2637
0
      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2638
0
    }
2639
0
  }
2640
334
}
2641
2642
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2643
305
                             SelectionDAG &DAG) {
2644
305
  SDLoc DL(HiPart);
2645
305
  EVT PtrVT = HiPart.getValueType();
2646
305
  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2647
305
2648
305
  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2649
305
  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2650
305
2651
305
  // With PIC, the first instruction is actually "GR+hi(&G)".
2652
305
  if (isPIC)
2653
0
    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2654
0
                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2655
305
2656
305
  // Generate non-pic code that has direct accesses to the constant pool.
2657
305
  // The address of the global is just (hi(&g)+lo(&g)).
2658
305
  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2659
305
}
2660
2661
5.47k
static void setUsesTOCBasePtr(MachineFunction &MF) {
2662
5.47k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2663
5.47k
  FuncInfo->setUsesTOCBasePtr();
2664
5.47k
}
2665
2666
5.46k
static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2667
5.46k
  setUsesTOCBasePtr(DAG.getMachineFunction());
2668
5.46k
}
2669
2670
static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2671
3.54k
                           SDValue GA) {
2672
3.54k
  EVT VT = Is64Bit ? 
MVT::i643.51k
:
MVT::i3232
;
2673
3.54k
  SDValue Reg = Is64Bit ? 
DAG.getRegister(PPC::X2, VT)3.51k
:
2674
3.54k
                
DAG.getNode(PPCISD::GlobalBaseReg, dl, VT)32
;
2675
3.54k
2676
3.54k
  SDValue Ops[] = { GA, Reg };
2677
3.54k
  return DAG.getMemIntrinsicNode(
2678
3.54k
      PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2679
3.54k
      MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
2680
3.54k
      MachineMemOperand::MOLoad);
2681
3.54k
}
2682
2683
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2684
1.79k
                                             SelectionDAG &DAG) const {
2685
1.79k
  EVT PtrVT = Op.getValueType();
2686
1.79k
  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2687
1.79k
  const Constant *C = CP->getConstVal();
2688
1.79k
2689
1.79k
  // 64-bit SVR4 ABI code is always position-independent.
2690
1.79k
  // The actual address of the GlobalValue is stored in the TOC.
2691
1.79k
  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2692
1.69k
    setUsesTOCBasePtr(DAG);
2693
1.69k
    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2694
1.69k
    return getTOCEntry(DAG, SDLoc(CP), true, GA);
2695
1.69k
  }
2696
96
2697
96
  unsigned MOHiFlag, MOLoFlag;
2698
96
  bool IsPIC = isPositionIndependent();
2699
96
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2700
96
2701
96
  if (IsPIC && 
Subtarget.isSVR4ABI()11
) {
2702
11
    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2703
11
                                           PPCII::MO_PIC_FLAG);
2704
11
    return getTOCEntry(DAG, SDLoc(CP), false, GA);
2705
11
  }
2706
85
2707
85
  SDValue CPIHi =
2708
85
    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2709
85
  SDValue CPILo =
2710
85
    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2711
85
  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2712
85
}
2713
2714
// For 64-bit PowerPC, prefer the more compact relative encodings.
2715
// This trades 32 bits per jump table entry for one or two instructions
2716
// on the jump site.
2717
19
unsigned PPCTargetLowering::getJumpTableEncoding() const {
2718
19
  if (isJumpTableRelative())
2719
16
    return MachineJumpTableInfo::EK_LabelDifference32;
2720
3
2721
3
  return TargetLowering::getJumpTableEncoding();
2722
3
}
2723
2724
32
bool PPCTargetLowering::isJumpTableRelative() const {
2725
32
  if (Subtarget.isPPC64())
2726
26
    return true;
2727
6
  return TargetLowering::isJumpTableRelative();
2728
6
}
2729
2730
SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2731
10
                                                    SelectionDAG &DAG) const {
2732
10
  if (!Subtarget.isPPC64())
2733
0
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2734
10
2735
10
  switch (getTargetMachine().getCodeModel()) {
2736
10
  case CodeModel::Small:
2737
6
  case CodeModel::Medium:
2738
6
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2739
6
  default:
2740
4
    return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2741
4
                       getPointerTy(DAG.getDataLayout()));
2742
10
  }
2743
10
}
2744
2745
const MCExpr *
2746
PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2747
                                                unsigned JTI,
2748
56
                                                MCContext &Ctx) const {
2749
56
  if (!Subtarget.isPPC64())
2750
0
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2751
56
2752
56
  switch (getTargetMachine().getCodeModel()) {
2753
56
  case CodeModel::Small:
2754
38
  case CodeModel::Medium:
2755
38
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2756
38
  default:
2757
18
    return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2758
56
  }
2759
56
}
2760
2761
13
SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2762
13
  EVT PtrVT = Op.getValueType();
2763
13
  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2764
13
2765
13
  // 64-bit SVR4 ABI code is always position-independent.
2766
13
  // The actual address of the GlobalValue is stored in the TOC.
2767
13
  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2768
10
    setUsesTOCBasePtr(DAG);
2769
10
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2770
10
    return getTOCEntry(DAG, SDLoc(JT), true, GA);
2771
10
  }
2772
3
2773
3
  unsigned MOHiFlag, MOLoFlag;
2774
3
  bool IsPIC = isPositionIndependent();
2775
3
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2776
3
2777
3
  if (IsPIC && 
Subtarget.isSVR4ABI()0
) {
2778
0
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2779
0
                                        PPCII::MO_PIC_FLAG);
2780
0
    return getTOCEntry(DAG, SDLoc(GA), false, GA);
2781
0
  }
2782
3
2783
3
  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2784
3
  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2785
3
  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2786
3
}
2787
2788
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2789
14
                                             SelectionDAG &DAG) const {
2790
14
  EVT PtrVT = Op.getValueType();
2791
14
  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2792
14
  const BlockAddress *BA = BASDN->getBlockAddress();
2793
14
2794
14
  // 64-bit SVR4 ABI code is always position-independent.
2795
14
  // The actual BlockAddress is stored in the TOC.
2796
14
  if (Subtarget.isSVR4ABI() &&
2797
14
      (Subtarget.isPPC64() || 
isPositionIndependent()5
)) {
2798
12
    if (Subtarget.isPPC64())
2799
9
      setUsesTOCBasePtr(DAG);
2800
12
    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2801
12
    return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
2802
12
  }
2803
2
2804
2
  unsigned MOHiFlag, MOLoFlag;
2805
2
  bool IsPIC = isPositionIndependent();
2806
2
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2807
2
  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2808
2
  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2809
2
  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2810
2
}
2811
2812
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2813
63
                                              SelectionDAG &DAG) const {
2814
63
  // FIXME: TLS addresses currently use medium model code sequences,
2815
63
  // which is the most useful form.  Eventually support for small and
2816
63
  // large models could be added if users need it, at the cost of
2817
63
  // additional complexity.
2818
63
  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2819
63
  if (DAG.getTarget().useEmulatedTLS())
2820
6
    return LowerToTLSEmulatedModel(GA, DAG);
2821
57
2822
57
  SDLoc dl(GA);
2823
57
  const GlobalValue *GV = GA->getGlobal();
2824
57
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2825
57
  bool is64bit = Subtarget.isPPC64();
2826
57
  const Module *M = DAG.getMachineFunction().getFunction().getParent();
2827
57
  PICLevel::Level picLevel = M->getPICLevel();
2828
57
2829
57
  const TargetMachine &TM = getTargetMachine();
2830
57
  TLSModel::Model Model = TM.getTLSModel(GV);
2831
57
2832
57
  if (Model == TLSModel::LocalExec) {
2833
10
    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2834
10
                                               PPCII::MO_TPREL_HA);
2835
10
    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2836
10
                                               PPCII::MO_TPREL_LO);
2837
10
    SDValue TLSReg = is64bit ? 
DAG.getRegister(PPC::X13, MVT::i64)9
2838
10
                             : 
DAG.getRegister(PPC::R2, MVT::i32)1
;
2839
10
2840
10
    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2841
10
    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2842
10
  }
2843
47
2844
47
  if (Model == TLSModel::InitialExec) {
2845
17
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2846
17
    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2847
17
                                                PPCII::MO_TLS);
2848
17
    SDValue GOTPtr;
2849
17
    if (is64bit) {
2850
15
      setUsesTOCBasePtr(DAG);
2851
15
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2852
15
      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2853
15
                           PtrVT, GOTReg, TGA);
2854
15
    } else {
2855
2
      if (!TM.isPositionIndependent())
2856
1
        GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2857
1
      else if (picLevel == PICLevel::SmallPIC)
2858
0
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2859
1
      else
2860
1
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2861
2
    }
2862
17
    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2863
17
                                   PtrVT, TGA, GOTPtr);
2864
17
    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2865
17
  }
2866
30
2867
30
  if (Model == TLSModel::GeneralDynamic) {
2868
20
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2869
20
    SDValue GOTPtr;
2870
20
    if (is64bit) {
2871
13
      setUsesTOCBasePtr(DAG);
2872
13
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2873
13
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2874
13
                                   GOTReg, TGA);
2875
13
    } else {
2876
7
      if (picLevel == PICLevel::SmallPIC)
2877
1
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2878
6
      else
2879
6
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2880
7
    }
2881
20
    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2882
20
                       GOTPtr, TGA, TGA);
2883
20
  }
2884
10
2885
10
  if (Model == TLSModel::LocalDynamic) {
2886
10
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2887
10
    SDValue GOTPtr;
2888
10
    if (is64bit) {
2889
7
      setUsesTOCBasePtr(DAG);
2890
7
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2891
7
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2892
7
                           GOTReg, TGA);
2893
7
    } else {
2894
3
      if (picLevel == PICLevel::SmallPIC)
2895
0
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2896
3
      else
2897
3
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2898
3
    }
2899
10
    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2900
10
                                  PtrVT, GOTPtr, TGA, TGA);
2901
10
    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2902
10
                                      PtrVT, TLSAddr, TGA);
2903
10
    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2904
10
  }
2905
0
2906
0
  llvm_unreachable("Unknown TLS model!");
2907
0
}
2908
2909
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2910
2.03k
                                              SelectionDAG &DAG) const {
2911
2.03k
  EVT PtrVT = Op.getValueType();
2912
2.03k
  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2913
2.03k
  SDLoc DL(GSDN);
2914
2.03k
  const GlobalValue *GV = GSDN->getGlobal();
2915
2.03k
2916
2.03k
  // 64-bit SVR4 ABI code is always position-independent.
2917
2.03k
  // The actual address of the GlobalValue is stored in the TOC.
2918
2.03k
  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2919
1.79k
    setUsesTOCBasePtr(DAG);
2920
1.79k
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2921
1.79k
    return getTOCEntry(DAG, DL, true, GA);
2922
1.79k
  }
2923
233
2924
233
  unsigned MOHiFlag, MOLoFlag;
2925
233
  bool IsPIC = isPositionIndependent();
2926
233
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2927
233
2928
233
  if (IsPIC && 
Subtarget.isSVR4ABI()18
) {
2929
18
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2930
18
                                            GSDN->getOffset(),
2931
18
                                            PPCII::MO_PIC_FLAG);
2932
18
    return getTOCEntry(DAG, DL, false, GA);
2933
18
  }
2934
215
2935
215
  SDValue GAHi =
2936
215
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2937
215
  SDValue GALo =
2938
215
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2939
215
2940
215
  SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2941
215
2942
215
  // If the global reference is actually to a non-lazy-pointer, we have to do an
2943
215
  // extra load to get the address of the global.
2944
215
  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2945
0
    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2946
215
  return Ptr;
2947
215
}
2948
2949
38
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2950
38
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2951
38
  SDLoc dl(Op);
2952
38
2953
38
  if (Op.getValueType() == MVT::v2i64) {
2954
15
    // When the operands themselves are v2i64 values, we need to do something
2955
15
    // special because VSX has no underlying comparison operations for these.
2956
15
    if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2957
12
      // Equality can be handled by casting to the legal type for Altivec
2958
12
      // comparisons, everything else needs to be expanded.
2959
12
      if (CC == ISD::SETEQ || 
CC == ISD::SETNE9
) {
2960
6
        return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2961
6
                 DAG.getSetCC(dl, MVT::v4i32,
2962
6
                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2963
6
                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2964
6
                   CC));
2965
6
      }
2966
6
2967
6
      return SDValue();
2968
6
    }
2969
3
2970
3
    // We handle most of these in the usual way.
2971
3
    return Op;
2972
3
  }
2973
23
2974
23
  // If we're comparing for equality to zero, expose the fact that this is
2975
23
  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2976
23
  // fold the new nodes.
2977
23
  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2978
6
    return V;
2979
17
2980
17
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2981
15
    // Leave comparisons against 0 and -1 alone for now, since they're usually
2982
15
    // optimized.  FIXME: revisit this when we can custom lower all setcc
2983
15
    // optimizations.
2984
15
    if (C->isAllOnesValue() || 
C->isNullValue()14
)
2985
10
      return SDValue();
2986
7
  }
2987
7
2988
7
  // If we have an integer seteq/setne, turn it into a compare against zero
2989
7
  // by xor'ing the rhs with the lhs, which is faster than setting a
2990
7
  // condition register, reading it back out, and masking the correct bit.  The
2991
7
  // normal approach here uses sub to do this instead of xor.  Using xor exposes
2992
7
  // the result to other bit-twiddling opportunities.
2993
7
  EVT LHSVT = Op.getOperand(0).getValueType();
2994
7
  if (LHSVT.isInteger() && (CC == ISD::SETEQ || 
CC == ISD::SETNE5
)) {
2995
2
    EVT VT = Op.getValueType();
2996
2
    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2997
2
                                Op.getOperand(1));
2998
2
    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2999
2
  }
3000
5
  return SDValue();
3001
5
}
3002
3003
2
SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3004
2
  SDNode *Node = Op.getNode();
3005
2
  EVT VT = Node->getValueType(0);
3006
2
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3007
2
  SDValue InChain = Node->getOperand(0);
3008
2
  SDValue VAListPtr = Node->getOperand(1);
3009
2
  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3010
2
  SDLoc dl(Node);
3011
2
3012
2
  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3013
2
3014
2
  // gpr_index
3015
2
  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3016
2
                                    VAListPtr, MachinePointerInfo(SV), MVT::i8);
3017
2
  InChain = GprIndex.getValue(1);
3018
2
3019
2
  if (VT == MVT::i64) {
3020
0
    // Check if GprIndex is even
3021
0
    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3022
0
                                 DAG.getConstant(1, dl, MVT::i32));
3023
0
    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3024
0
                                DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3025
0
    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3026
0
                                          DAG.getConstant(1, dl, MVT::i32));
3027
0
    // Align GprIndex to be even if it isn't
3028
0
    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3029
0
                           GprIndex);
3030
0
  }
3031
2
3032
2
  // fpr index is 1 byte after gpr
3033
2
  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3034
2
                               DAG.getConstant(1, dl, MVT::i32));
3035
2
3036
2
  // fpr
3037
2
  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3038
2
                                    FprPtr, MachinePointerInfo(SV), MVT::i8);
3039
2
  InChain = FprIndex.getValue(1);
3040
2
3041
2
  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3042
2
                                       DAG.getConstant(8, dl, MVT::i32));
3043
2
3044
2
  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3045
2
                                        DAG.getConstant(4, dl, MVT::i32));
3046
2
3047
2
  // areas
3048
2
  SDValue OverflowArea =
3049
2
      DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3050
2
  InChain = OverflowArea.getValue(1);
3051
2
3052
2
  SDValue RegSaveArea =
3053
2
      DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3054
2
  InChain = RegSaveArea.getValue(1);
3055
2
3056
2
  // select overflow_area if index > 8
3057
2
  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : 
FprIndex0
,
3058
2
                            DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3059
2
3060
2
  // adjustment constant gpr_index * 4/8
3061
2
  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3062
2
                                    VT.isInteger() ? GprIndex : 
FprIndex0
,
3063
2
                                    DAG.getConstant(VT.isInteger() ? 4 : 
80
, dl,
3064
2
                                                    MVT::i32));
3065
2
3066
2
  // OurReg = RegSaveArea + RegConstant
3067
2
  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3068
2
                               RegConstant);
3069
2
3070
2
  // Floating types are 32 bytes into RegSaveArea
3071
2
  if (VT.isFloatingPoint())
3072
0
    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3073
0
                         DAG.getConstant(32, dl, MVT::i32));
3074
2
3075
2
  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3076
2
  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3077
2
                                   VT.isInteger() ? GprIndex : 
FprIndex0
,
3078
2
                                   DAG.getConstant(VT == MVT::i64 ? 
20
: 1, dl,
3079
2
                                                   MVT::i32));
3080
2
3081
2
  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3082
2
                              VT.isInteger() ? VAListPtr : 
FprPtr0
,
3083
2
                              MachinePointerInfo(SV), MVT::i8);
3084
2
3085
2
  // determine if we should load from reg_save_area or overflow_area
3086
2
  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3087
2
3088
2
  // increase overflow_area by 4/8 if gpr/fpr > 8
3089
2
  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3090
2
                                          DAG.getConstant(VT.isInteger() ? 4 : 
80
,
3091
2
                                          dl, MVT::i32));
3092
2
3093
2
  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3094
2
                             OverflowAreaPlusN);
3095
2
3096
2
  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3097
2
                              MachinePointerInfo(), MVT::i32);
3098
2
3099
2
  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3100
2
}
3101
3102
1
SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3103
1
  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3104
1
3105
1
  // We have to copy the entire va_list struct:
3106
1
  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3107
1
  return DAG.getMemcpy(Op.getOperand(0), Op,
3108
1
                       Op.getOperand(1), Op.getOperand(2),
3109
1
                       DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3110
1
                       false, MachinePointerInfo(), MachinePointerInfo());
3111
1
}
3112
3113
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3114
1
                                                  SelectionDAG &DAG) const {
3115
1
  return Op.getOperand(0);
3116
1
}
3117
3118
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3119
1
                                                SelectionDAG &DAG) const {
3120
1
  SDValue Chain = Op.getOperand(0);
3121
1
  SDValue Trmp = Op.getOperand(1); // trampoline
3122
1
  SDValue FPtr = Op.getOperand(2); // nested function
3123
1
  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3124
1
  SDLoc dl(Op);
3125
1
3126
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3127
1
  bool isPPC64 = (PtrVT == MVT::i64);
3128
1
  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3129
1
3130
1
  TargetLowering::ArgListTy Args;
3131
1
  TargetLowering::ArgListEntry Entry;
3132
1
3133
1
  Entry.Ty = IntPtrTy;
3134
1
  Entry.Node = Trmp; Args.push_back(Entry);
3135
1
3136
1
  // TrampSize == (isPPC64 ? 48 : 40);
3137
1
  Entry.Node = DAG.getConstant(isPPC64 ? 
480
: 40, dl,
3138
1
                               isPPC64 ? 
MVT::i640
: MVT::i32);
3139
1
  Args.push_back(Entry);
3140
1
3141
1
  Entry.Node = FPtr; Args.push_back(Entry);
3142
1
  Entry.Node = Nest; Args.push_back(Entry);
3143
1
3144
1
  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3145
1
  TargetLowering::CallLoweringInfo CLI(DAG);
3146
1
  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3147
1
      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3148
1
      DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3149
1
3150
1
  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3151
1
  return CallResult.second;
3152
1
}
3153
3154
8
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3155
8
  MachineFunction &MF = DAG.getMachineFunction();
3156
8
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3157
8
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3158
8
3159
8
  SDLoc dl(Op);
3160
8
3161
8
  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3162
6
    // vastart just stores the address of the VarArgsFrameIndex slot into the
3163
6
    // memory location argument.
3164
6
    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3165
6
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3166
6
    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3167
6
                        MachinePointerInfo(SV));
3168
6
  }
3169
2
3170
2
  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3171
2
  // We suppose the given va_list is already allocated.
3172
2
  //
3173
2
  // typedef struct {
3174
2
  //  char gpr;     /* index into the array of 8 GPRs
3175
2
  //                 * stored in the register save area
3176
2
  //                 * gpr=0 corresponds to r3,
3177
2
  //                 * gpr=1 to r4, etc.
3178
2
  //                 */
3179
2
  //  char fpr;     /* index into the array of 8 FPRs
3180
2
  //                 * stored in the register save area
3181
2
  //                 * fpr=0 corresponds to f1,
3182
2
  //                 * fpr=1 to f2, etc.
3183
2
  //                 */
3184
2
  //  char *overflow_arg_area;
3185
2
  //                /* location on stack that holds
3186
2
  //                 * the next overflow argument
3187
2
  //                 */
3188
2
  //  char *reg_save_area;
3189
2
  //               /* where r3:r10 and f1:f8 (if saved)
3190
2
  //                * are stored
3191
2
  //                */
3192
2
  // } va_list[1];
3193
2
3194
2
  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3195
2
  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3196
2
  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3197
2
                                            PtrVT);
3198
2
  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3199
2
                                 PtrVT);
3200
2
3201
2
  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3202
2
  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3203
2
3204
2
  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3205
2
  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3206
2
3207
2
  uint64_t FPROffset = 1;
3208
2
  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3209
2
3210
2
  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3211
2
3212
2
  // Store first byte : number of int regs
3213
2
  SDValue firstStore =
3214
2
      DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3215
2
                        MachinePointerInfo(SV), MVT::i8);
3216
2
  uint64_t nextOffset = FPROffset;
3217
2
  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3218
2
                                  ConstFPROffset);
3219
2
3220
2
  // Store second byte : number of float regs
3221
2
  SDValue secondStore =
3222
2
      DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3223
2
                        MachinePointerInfo(SV, nextOffset), MVT::i8);
3224
2
  nextOffset += StackOffset;
3225
2
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3226
2
3227
2
  // Store second word : arguments given on stack
3228
2
  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3229
2
                                    MachinePointerInfo(SV, nextOffset));
3230
2
  nextOffset += FrameOffset;
3231
2
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3232
2
3233
2
  // Store third word : arguments given in registers
3234
2
  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3235
2
                      MachinePointerInfo(SV, nextOffset));
3236
2
}
3237
3238
/// FPR - The set of FP registers that should be allocated for arguments,
3239
/// on Darwin.
3240
static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3241
                                PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3242
                                PPC::F11, PPC::F12, PPC::F13};
3243
3244
/// QFPR - The set of QPX registers that should be allocated for arguments.
3245
static const MCPhysReg QFPR[] = {
3246
    PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3247
    PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3248
3249
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3250
/// the stack.
3251
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3252
26.1k
                                       unsigned PtrByteSize) {
3253
26.1k
  unsigned ArgSize = ArgVT.getStoreSize();
3254
26.1k
  if (Flags.isByVal())
3255
144
    ArgSize = Flags.getByValSize();
3256
26.1k
3257
26.1k
  // Round up to multiples of the pointer size, except for array members,
3258
26.1k
  // which are always packed.
3259
26.1k
  if (!Flags.isInConsecutiveRegs())
3260
23.5k
    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3261
26.1k
3262
26.1k
  return ArgSize;
3263
26.1k
}
3264
3265
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3266
/// on the stack.
3267
static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3268
                                            ISD::ArgFlagsTy Flags,
3269
48.6k
                                            unsigned PtrByteSize) {
3270
48.6k
  unsigned Align = PtrByteSize;
3271
48.6k
3272
48.6k
  // Altivec parameters are padded to a 16 byte boundary.
3273
48.6k
  if (ArgVT == MVT::v4f32 || 
ArgVT == MVT::v4i3247.3k
||
3274
48.6k
      
ArgVT == MVT::v8i1644.3k
||
ArgVT == MVT::v16i843.1k
||
3275
48.6k
      
ArgVT == MVT::v2f6441.4k
||
ArgVT == MVT::v2i6440.7k
||
3276
48.6k
      
ArgVT == MVT::v1i12839.1k
||
ArgVT == MVT::f12838.9k
)
3277
9.96k
    Align = 16;
3278
38.6k
  // QPX vector types stored in double-precision are padded to a 32 byte
3279
38.6k
  // boundary.
3280
38.6k
  else if (ArgVT == MVT::v4f64 || 
ArgVT == MVT::v4i138.4k
)
3281
266
    Align = 32;
3282
48.6k
3283
48.6k
  // ByVal parameters are aligned as requested.
3284
48.6k
  if (Flags.isByVal()) {
3285
282
    unsigned BVAlign = Flags.getByValAlign();
3286
282
    if (BVAlign > PtrByteSize) {
3287
48
      if (BVAlign % PtrByteSize != 0)
3288
48
          
llvm_unreachable0
(
3289
48
            "ByVal alignment is not a multiple of the pointer size");
3290
48
3291
48
      Align = BVAlign;
3292
48
    }
3293
282
  }
3294
48.6k
3295
48.6k
  // Array members are always packed to their original alignment.
3296
48.6k
  if (Flags.isInConsecutiveRegs()) {
3297
4.39k
    // If the array member was split into multiple registers, the first
3298
4.39k
    // needs to be aligned to the size of the full type.  (Except for
3299
4.39k
    // ppcf128, which is only aligned as its f64 components.)
3300
4.39k
    if (Flags.isSplit() && 
OrigVT != MVT::ppcf128260
)
3301
152
      Align = OrigVT.getStoreSize();
3302
4.24k
    else
3303
4.24k
      Align = ArgVT.getStoreSize();
3304
4.39k
  }
3305
48.6k
3306
48.6k
  return Align;
3307
48.6k
}
3308
3309
/// CalculateStackSlotUsed - Return whether this argument will use its
3310
/// stack slot (instead of being passed in registers).  ArgOffset,
3311
/// AvailableFPRs, and AvailableVRs must hold the current argument
3312
/// position, and will be updated to account for this argument.
3313
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3314
                                   ISD::ArgFlagsTy Flags,
3315
                                   unsigned PtrByteSize,
3316
                                   unsigned LinkageSize,
3317
                                   unsigned ParamAreaSize,
3318
                                   unsigned &ArgOffset,
3319
                                   unsigned &AvailableFPRs,
3320
22.7k
                                   unsigned &AvailableVRs, bool HasQPX) {
3321
22.7k
  bool UseMemory = false;
3322
22.7k
3323
22.7k
  // Respect alignment of argument on the stack.
3324
22.7k
  unsigned Align =
3325
22.7k
    CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3326
22.7k
  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3327
22.7k
  // If there's no space left in the argument save area, we must
3328
22.7k
  // use memory (this check also catches zero-sized arguments).
3329
22.7k
  if (ArgOffset >= LinkageSize + ParamAreaSize)
3330
2.74k
    UseMemory = true;
3331
22.7k
3332
22.7k
  // Allocate argument on the stack.
3333
22.7k
  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3334
22.7k
  if (Flags.isInConsecutiveRegsLast())
3335
271
    ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3336
22.7k
  // If we overran the argument save area, we must use memory
3337
22.7k
  // (this check catches arguments passed partially in memory)
3338
22.7k
  if (ArgOffset > LinkageSize + ParamAreaSize)
3339
2.79k
    UseMemory = true;
3340
22.7k
3341
22.7k
  // However, if the argument is actually passed in an FPR or a VR,
3342
22.7k
  // we don't use memory after all.
3343
22.7k
  if (!Flags.isByVal()) {
3344
22.6k
    if (ArgVT == MVT::f32 || 
ArgVT == MVT::f6420.5k
||
3345
22.6k
        // QPX registers overlap with the scalar FP registers.
3346
22.6k
        
(18.1k
HasQPX18.1k
&&
(302
ArgVT == MVT::v4f32302
||
3347
302
                    
ArgVT == MVT::v4f64212
||
3348
302
                    
ArgVT == MVT::v4i1135
)))
3349
4.71k
      if (AvailableFPRs > 0) {
3350
4.53k
        --AvailableFPRs;
3351
4.53k
        return false;
3352
4.53k
      }
3353
18.0k
    if (ArgVT == MVT::v4f32 || 
ArgVT == MVT::v4i3217.5k
||
3354
18.0k
        
ArgVT == MVT::v8i1615.9k
||
ArgVT == MVT::v16i815.3k
||
3355
18.0k
        
ArgVT == MVT::v2f6414.4k
||
ArgVT == MVT::v2i6414.1k
||
3356
18.0k
        
ArgVT == MVT::v1i12813.3k
||
ArgVT == MVT::f12813.2k
)
3357
4.98k
      if (AvailableVRs > 0) {
3358
4.66k
        --AvailableVRs;
3359
4.66k
        return false;
3360
4.66k
      }
3361
13.5k
  }
3362
13.5k
3363
13.5k
  return UseMemory;
3364
13.5k
}
3365
3366
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3367
/// ensure minimum alignment required for target.
3368
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3369
11.1k
                                     unsigned NumBytes) {
3370
11.1k
  unsigned TargetAlign = Lowering->getStackAlignment();
3371
11.1k
  unsigned AlignMask = TargetAlign - 1;
3372
11.1k
  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3373
11.1k
  return NumBytes;
3374
11.1k
}
3375
3376
SDValue PPCTargetLowering::LowerFormalArguments(
3377
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3378
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3379
11.1k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3380
11.1k
  if (Subtarget.isSVR4ABI()) {
3381
11.1k
    if (Subtarget.isPPC64())
3382
10.1k
      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3383
10.1k
                                         dl, DAG, InVals);
3384
1.04k
    else
3385
1.04k
      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3386
1.04k
                                         dl, DAG, InVals);
3387
24
  } else {
3388
24
    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3389
24
                                       dl, DAG, InVals);
3390
24
  }
3391
11.1k
}
3392
3393
SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3394
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3395
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3396
1.04k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3397
1.04k
3398
1.04k
  // 32-bit SVR4 ABI Stack Frame Layout:
3399
1.04k
  //              +-----------------------------------+
3400
1.04k
  //        +-->  |            Back chain             |
3401
1.04k
  //        |     +-----------------------------------+
3402
1.04k
  //        |     | Floating-point register save area |
3403
1.04k
  //        |     +-----------------------------------+
3404
1.04k
  //        |     |    General register save area     |
3405
1.04k
  //        |     +-----------------------------------+
3406
1.04k
  //        |     |          CR save word             |
3407
1.04k
  //        |     +-----------------------------------+
3408
1.04k
  //        |     |         VRSAVE save word          |
3409
1.04k
  //        |     +-----------------------------------+
3410
1.04k
  //        |     |         Alignment padding         |
3411
1.04k
  //        |     +-----------------------------------+
3412
1.04k
  //        |     |     Vector register save area     |
3413
1.04k
  //        |     +-----------------------------------+
3414
1.04k
  //        |     |       Local variable space        |
3415
1.04k
  //        |     +-----------------------------------+
3416
1.04k
  //        |     |        Parameter list area        |
3417
1.04k
  //        |     +-----------------------------------+
3418
1.04k
  //        |     |           LR save word            |
3419
1.04k
  //        |     +-----------------------------------+
3420
1.04k
  // SP-->  +---  |            Back chain             |
3421
1.04k
  //              +-----------------------------------+
3422
1.04k
  //
3423
1.04k
  // Specifications:
3424
1.04k
  //   System V Application Binary Interface PowerPC Processor Supplement
3425
1.04k
  //   AltiVec Technology Programming Interface Manual
3426
1.04k
3427
1.04k
  MachineFunction &MF = DAG.getMachineFunction();
3428
1.04k
  MachineFrameInfo &MFI = MF.getFrameInfo();
3429
1.04k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3430
1.04k
3431
1.04k
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3432
1.04k
  // Potential tail calls could cause overwriting of argument stack slots.
3433
1.04k
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3434
1.04k
                       
(CallConv == CallingConv::Fast)4
);
3435
1.04k
  unsigned PtrByteSize = 4;
3436
1.04k
3437
1.04k
  // Assign locations to all of the incoming arguments.
3438
1.04k
  SmallVector<CCValAssign, 16> ArgLocs;
3439
1.04k
  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3440
1.04k
                 *DAG.getContext());
3441
1.04k
3442
1.04k
  // Reserve space for the linkage area on the stack.
3443
1.04k
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3444
1.04k
  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3445
1.04k
  if (useSoftFloat())
3446
24
    CCInfo.PreAnalyzeFormalArguments(Ins);
3447
1.04k
3448
1.04k
  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3449
1.04k
  CCInfo.clearWasPPCF128();
3450
1.04k
3451
2.90k
  for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i1.85k
) {
3452
1.85k
    CCValAssign &VA = ArgLocs[i];
3453
1.85k
3454
1.85k
    // Arguments stored in registers.
3455
1.85k
    if (VA.isRegLoc()) {
3456
1.76k
      const TargetRegisterClass *RC;
3457
1.76k
      EVT ValVT = VA.getValVT();
3458
1.76k
3459
1.76k
      switch (ValVT.getSimpleVT().SimpleTy) {
3460
1.76k
        default:
3461
0
          llvm_unreachable("ValVT not supported by formal arguments Lowering");
3462
1.76k
        case MVT::i1:
3463
1.23k
        case MVT::i32:
3464
1.23k
          RC = &PPC::GPRCRegClass;
3465
1.23k
          break;
3466
1.23k
        case MVT::f32:
3467
226
          if (Subtarget.hasP8Vector())
3468
0
            RC = &PPC::VSSRCRegClass;
3469
226
          else if (Subtarget.hasSPE())
3470
57
            RC = &PPC::SPE4RCRegClass;
3471
169
          else
3472
169
            RC = &PPC::F4RCRegClass;
3473
226
          break;
3474
1.23k
        case MVT::f64:
3475
281
          if (Subtarget.hasVSX())
3476
0
            RC = &PPC::VSFRCRegClass;
3477
281
          else if (Subtarget.hasSPE())
3478
60
            // SPE passes doubles in GPR pairs.
3479
60
            RC = &PPC::GPRCRegClass;
3480
221
          else
3481
221
            RC = &PPC::F8RCRegClass;
3482
281
          break;
3483
1.23k
        case MVT::v16i8:
3484
13
        case MVT::v8i16:
3485
13
        case MVT::v4i32:
3486
13
          RC = &PPC::VRRCRegClass;
3487
13
          break;
3488
13
        case MVT::v4f32:
3489
9
          RC = Subtarget.hasQPX() ? 
&PPC::QSRCRegClass0
: &PPC::VRRCRegClass;
3490
9
          break;
3491
13
        case MVT::v2f64:
3492
0
        case MVT::v2i64:
3493
0
          RC = &PPC::VRRCRegClass;
3494
0
          break;
3495
0
        case MVT::v4f64:
3496
0
          RC = &PPC::QFRCRegClass;
3497
0
          break;
3498
0
        case MVT::v4i1:
3499
0
          RC = &PPC::QBRCRegClass;
3500
0
          break;
3501
1.76k
      }
3502
1.76k
3503
1.76k
      SDValue ArgValue;
3504
1.76k
      // Transform the arguments stored in physical registers into
3505
1.76k
      // virtual ones.
3506
1.76k
      if (VA.getLocVT() == MVT::f64 && 
Subtarget.hasSPE()281
) {
3507
60
        assert(i + 1 < e && "No second half of double precision argument");
3508
60
        unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3509
60
        unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3510
60
        SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3511
60
        SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3512
60
        if (!Subtarget.isLittleEndian())
3513
60
          std::swap (ArgValueLo, ArgValueHi);
3514
60
        ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3515
60
                               ArgValueHi);
3516
1.70k
      } else {
3517
1.70k
        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3518
1.70k
        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3519
1.70k
                                      ValVT == MVT::i1 ? 
MVT::i323
:
ValVT1.69k
);
3520
1.70k
        if (ValVT == MVT::i1)
3521
3
          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3522
1.70k
      }
3523
1.76k
3524
1.76k
      InVals.push_back(ArgValue);
3525
1.76k
    } else {
3526
95
      // Argument stored in memory.
3527
95
      assert(VA.isMemLoc());
3528
95
3529
95
      // Get the extended size of the argument type in stack
3530
95
      unsigned ArgSize = VA.getLocVT().getStoreSize();
3531
95
      // Get the actual size of the argument type
3532
95
      unsigned ObjSize = VA.getValVT().getStoreSize();
3533
95
      unsigned ArgOffset = VA.getLocMemOffset();
3534
95
      // Stack objects in PPC32 are right justified.
3535
95
      ArgOffset += ArgSize - ObjSize;
3536
95
      int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3537
95
3538
95
      // Create load nodes to retrieve arguments from the stack.
3539
95
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3540
95
      InVals.push_back(
3541
95
          DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3542
95
    }
3543
1.85k
  }
3544
1.04k
3545
1.04k
  // Assign locations to all of the incoming aggregate by value arguments.
3546
1.04k
  // Aggregates passed by value are stored in the local variable space of the
3547
1.04k
  // caller's stack frame, right above the parameter list area.
3548
1.04k
  SmallVector<CCValAssign, 16> ByValArgLocs;
3549
1.04k
  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3550
1.04k
                      ByValArgLocs, *DAG.getContext());
3551
1.04k
3552
1.04k
  // Reserve stack space for the allocations in CCInfo.
3553
1.04k
  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3554
1.04k
3555
1.04k
  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3556
1.04k
3557
1.04k
  // Area that is at least reserved in the caller of this function.
3558
1.04k
  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3559
1.04k
  MinReservedArea = std::max(MinReservedArea, LinkageSize);
3560
1.04k
3561
1.04k
  // Set the size that is at least reserved in caller of this function.  Tail
3562
1.04k
  // call optimized function's reserved stack space needs to be aligned so that
3563
1.04k
  // taking the difference between two stack areas will result in an aligned
3564
1.04k
  // stack.
3565
1.04k
  MinReservedArea =
3566
1.04k
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3567
1.04k
  FuncInfo->setMinReservedArea(MinReservedArea);
3568
1.04k
3569
1.04k
  SmallVector<SDValue, 8> MemOps;
3570
1.04k
3571
1.04k
  // If the function takes variable number of arguments, make a frame index for
3572
1.04k
  // the start of the first vararg value... for expansion of llvm.va_start.
3573
1.04k
  if (isVarArg) {
3574
3
    static const MCPhysReg GPArgRegs[] = {
3575
3
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3576
3
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3577
3
    };
3578
3
    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3579
3
3580
3
    static const MCPhysReg FPArgRegs[] = {
3581
3
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3582
3
      PPC::F8
3583
3
    };
3584
3
    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3585
3
3586
3
    if (useSoftFloat() || hasSPE())
3587
0
       NumFPArgRegs = 0;
3588
3
3589
3
    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3590
3
    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3591
3
3592
3
    // Make room for NumGPArgRegs and NumFPArgRegs.
3593
3
    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3594
3
                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3595
3
3596
3
    FuncInfo->setVarArgsStackOffset(
3597
3
      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3598
3
                            CCInfo.getNextStackOffset(), true));
3599
3
3600
3
    FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3601
3
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3602
3
3603
3
    // The fixed integer arguments of a variadic function are stored to the
3604
3
    // VarArgsFrameIndex on the stack so that they may be loaded by
3605
3
    // dereferencing the result of va_next.
3606
27
    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; 
++GPRIndex24
) {
3607
24
      // Get an existing live-in vreg, or add a new one.
3608
24
      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3609
24
      if (!VReg)
3610
14
        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3611
24
3612
24
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3613
24
      SDValue Store =
3614
24
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3615
24
      MemOps.push_back(Store);
3616
24
      // Increment the address by four for the next argument to store
3617
24
      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3618
24
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3619
24
    }
3620
3
3621
3
    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3622
3
    // is set.
3623
3
    // The double arguments are stored to the VarArgsFrameIndex
3624
3
    // on the stack.
3625
27
    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; 
++FPRIndex24
) {
3626
24
      // Get an existing live-in vreg, or add a new one.
3627
24
      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3628
24
      if (!VReg)
3629
23
        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3630
24
3631
24
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3632
24
      SDValue Store =
3633
24
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3634
24
      MemOps.push_back(Store);
3635
24
      // Increment the address by eight for the next argument to store
3636
24
      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3637
24
                                         PtrVT);
3638
24
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3639
24
    }
3640
3
  }
3641
1.04k
3642
1.04k
  if (!MemOps.empty())
3643
3
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3644
1.04k
3645
1.04k
  return Chain;
3646
1.04k
}
3647
3648
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3649
// value to MVT::i64 and then truncate to the correct register size.
3650
SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3651
                                             EVT ObjectVT, SelectionDAG &DAG,
3652
                                             SDValue ArgVal,
3653
4.30k
                                             const SDLoc &dl) const {
3654
4.30k
  if (Flags.isSExt())
3655
1.59k
    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3656
1.59k
                         DAG.getValueType(ObjectVT));
3657
2.70k
  else if (Flags.isZExt())
3658
1.10k
    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3659
1.10k
                         DAG.getValueType(ObjectVT));
3660
4.30k
3661
4.30k
  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3662
4.30k
}
3663
3664
SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3665
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3666
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3667
10.1k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3668
10.1k
  // TODO: add description of PPC stack frame format, or at least some docs.
3669
10.1k
  //
3670
10.1k
  bool isELFv2ABI = Subtarget.isELFv2ABI();
3671
10.1k
  bool isLittleEndian = Subtarget.isLittleEndian();
3672
10.1k
  MachineFunction &MF = DAG.getMachineFunction();
3673
10.1k
  MachineFrameInfo &MFI = MF.getFrameInfo();
3674
10.1k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3675
10.1k
3676
10.1k
  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3677
10.1k
         "fastcc not supported on varargs functions");
3678
10.1k
3679
10.1k
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3680
10.1k
  // Potential tail calls could cause overwriting of argument stack slots.
3681
10.1k
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3682
10.1k
                       
(CallConv == CallingConv::Fast)2
);
3683
10.1k
  unsigned PtrByteSize = 8;
3684
10.1k
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3685
10.1k
3686
10.1k
  static const MCPhysReg GPR[] = {
3687
10.1k
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3688
10.1k
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3689
10.1k
  };
3690
10.1k
  static const MCPhysReg VR[] = {
3691
10.1k
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3692
10.1k
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3693
10.1k
  };
3694
10.1k
3695
10.1k
  const unsigned Num_GPR_Regs = array_lengthof(GPR);
3696
10.1k
  const unsigned Num_FPR_Regs = useSoftFloat() ? 
08
:
1310.1k
;
3697
10.1k
  const unsigned Num_VR_Regs  = array_lengthof(VR);
3698
10.1k
  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3699
10.1k
3700
10.1k
  // Do a first pass over the arguments to determine whether the ABI
3701
10.1k
  // guarantees that our caller has allocated the parameter save area
3702
10.1k
  // on its stack frame.  In the ELFv1 ABI, this is always the case;
3703
10.1k
  // in the ELFv2 ABI, it is true if this is a vararg function or if
3704
10.1k
  // any parameter is located in a stack slot.
3705
10.1k
3706
10.1k
  bool HasParameterArea = !isELFv2ABI || 
isVarArg5.19k
;
3707
10.1k
  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3708
10.1k
  unsigned NumBytes = LinkageSize;
3709
10.1k
  unsigned AvailableFPRs = Num_FPR_Regs;
3710
10.1k
  unsigned AvailableVRs = Num_VR_Regs;
3711
30.6k
  for (unsigned i = 0, e = Ins.size(); i != e; 
++i20.5k
) {
3712
20.5k
    if (Ins[i].Flags.isNest())
3713
1
      continue;
3714
20.5k
3715
20.5k
    if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3716
20.5k
                               PtrByteSize, LinkageSize, ParamAreaSize,
3717
20.5k
                               NumBytes, AvailableFPRs, AvailableVRs,
3718
20.5k
                               Subtarget.hasQPX()))
3719
1.46k
      HasParameterArea = true;
3720
20.5k
  }
3721
10.1k
3722
10.1k
  // Add DAG nodes to load the arguments or copy them out of registers.  On
3723
10.1k
  // entry to a function on PPC, the arguments start after the linkage area,
3724
10.1k
  // although the first ones are often in registers.
3725
10.1k
3726
10.1k
  unsigned ArgOffset = LinkageSize;
3727
10.1k
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3728
10.1k
  unsigned &QFPR_idx = FPR_idx;
3729
10.1k
  SmallVector<SDValue, 8> MemOps;
3730
10.1k
  Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3731
10.1k
  unsigned CurArgIdx = 0;
3732
30.6k
  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 
++ArgNo20.5k
) {
3733
20.5k
    SDValue ArgVal;
3734
20.5k
    bool needsLoad = false;
3735
20.5k
    EVT ObjectVT = Ins[ArgNo].VT;
3736
20.5k
    EVT OrigVT = Ins[ArgNo].ArgVT;
3737
20.5k
    unsigned ObjSize = ObjectVT.getStoreSize();
3738
20.5k
    unsigned ArgSize = ObjSize;
3739
20.5k
    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3740
20.5k
    if (Ins[ArgNo].isOrigArg()) {
3741
20.5k
      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3742
20.5k
      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3743
20.5k
    }
3744
20.5k
    // We re-align the argument offset for each argument, except when using the
3745
20.5k
    // fast calling convention, when we need to make sure we do that only when
3746
20.5k
    // we'll actually use a stack slot.
3747
20.5k
    unsigned CurArgOffset, Align;
3748
20.5k
    auto ComputeArgOffset = [&]() {
3749
19.1k
      /* Respect alignment of argument on the stack.  */
3750
19.1k
      Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3751
19.1k
      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3752
19.1k
      CurArgOffset = ArgOffset;
3753
19.1k
    };
3754
20.5k
3755
20.5k
    if (CallConv != CallingConv::Fast) {
3756
18.4k
      ComputeArgOffset();
3757
18.4k
3758
18.4k
      /* Compute GPR index associated with argument offset.  */
3759
18.4k
      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3760
18.4k
      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3761
18.4k
    }
3762
20.5k
3763
20.5k
    // FIXME the codegen can be much improved in some cases.
3764
20.5k
    // We do not have to keep everything in memory.
3765
20.5k
    if (Flags.isByVal()) {
3766
77
      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3767
77
3768
77
      if (CallConv == CallingConv::Fast)
3769
1
        ComputeArgOffset();
3770
77
3771
77
      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3772
77
      ObjSize = Flags.getByValSize();
3773
77
      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3774
77
      // Empty aggregate parameters do not take up registers.  Examples:
3775
77
      //   struct { } a;
3776
77
      //   union  { } b;
3777
77
      //   int c[0];
3778
77
      // etc.  However, we have to provide a place-holder in InVals, so
3779
77
      // pretend we have an 8-byte item at the current address for that
3780
77
      // purpose.
3781
77
      if (!ObjSize) {
3782
2
        int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3783
2
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3784
2
        InVals.push_back(FIN);
3785
2
        continue;
3786
2
      }
3787
75
3788
75
      // Create a stack object covering all stack doublewords occupied
3789
75
      // by the argument.  If the argument is (fully or partially) on
3790
75
      // the stack, or if the argument is fully in registers but the
3791
75
      // caller has allocated the parameter save anyway, we can refer
3792
75
      // directly to the caller's stack frame.  Otherwise, create a
3793
75
      // local copy in our own frame.
3794
75
      int FI;
3795
75
      if (HasParameterArea ||
3796
75
          
ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize2
)
3797
73
        FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3798
2
      else
3799
2
        FI = MFI.CreateStackObject(ArgSize, Align, false);
3800
75
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3801
75
3802
75
      // Handle aggregates smaller than 8 bytes.
3803
75
      if (ObjSize < PtrByteSize) {
3804
29
        // The value of the object is its address, which differs from the
3805
29
        // address of the enclosing doubleword on big-endian systems.
3806
29
        SDValue Arg = FIN;
3807
29
        if (!isLittleEndian) {
3808
28
          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3809
28
          Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3810
28
        }
3811
29
        InVals.push_back(Arg);
3812
29
3813
29
        if (GPR_idx != Num_GPR_Regs) {
3814
16
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3815
16
          FuncInfo->addLiveInAttr(VReg, Flags);
3816
16
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3817
16
          SDValue Store;
3818
16
3819
16
          if (ObjSize==1 || 
ObjSize==214
||
ObjSize==411
) {
3820
8
            EVT ObjType = (ObjSize == 1 ? 
MVT::i82
:
3821
8
                           
(ObjSize == 2 6
?
MVT::i163
:
MVT::i323
));
3822
8
            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3823
8
                                      MachinePointerInfo(&*FuncArg), ObjType);
3824
8
          } else {
3825
8
            // For sizes that don't fit a truncating store (3, 5, 6, 7),
3826
8
            // store the whole register as-is to the parameter save area
3827
8
            // slot.
3828
8
            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3829
8
                                 MachinePointerInfo(&*FuncArg));
3830
8
          }
3831
16
3832
16
          MemOps.push_back(Store);
3833
16
        }
3834
29
        // Whether we copied from a register or not, advance the offset
3835
29
        // into the parameter save area by a full doubleword.
3836
29
        ArgOffset += PtrByteSize;
3837
29
        continue;
3838
29
      }
3839
46
3840
46
      // The value of the object is its address, which is the address of
3841
46
      // its first stack doubleword.
3842
46
      InVals.push_back(FIN);
3843
46
3844
46
      // Store whatever pieces of the object are in registers to memory.
3845
157
      for (unsigned j = 0; j < ArgSize; 
j += PtrByteSize111
) {
3846
129
        if (GPR_idx == Num_GPR_Regs)
3847
18
          break;
3848
111
3849
111
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3850
111
        FuncInfo->addLiveInAttr(VReg, Flags);
3851
111
        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3852
111
        SDValue Addr = FIN;
3853
111
        if (j) {
3854
75
          SDValue Off = DAG.getConstant(j, dl, PtrVT);
3855
75
          Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3856
75
        }
3857
111
        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3858
111
                                     MachinePointerInfo(&*FuncArg, j));
3859
111
        MemOps.push_back(Store);
3860
111
        ++GPR_idx;
3861
111
      }
3862
46
      ArgOffset += ArgSize;
3863
46
      continue;
3864
46
    }
3865
20.4k
3866
20.4k
    switch (ObjectVT.getSimpleVT().SimpleTy) {
3867
20.4k
    
default: 0
llvm_unreachable0
("Unhandled argument type!");
3868
20.4k
    case MVT::i1:
3869
11.8k
    case MVT::i32:
3870
11.8k
    case MVT::i64:
3871
11.8k
      if (Flags.isNest()) {
3872
1
        // The 'nest' parameter, if any, is passed in R11.
3873
1
        unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3874
1
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3875
1
3876
1
        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3877
0
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3878
1
3879
1
        break;
3880
1
      }
3881
11.8k
3882
11.8k
      // These can be scalar arguments or elements of an integer array type
3883
11.8k
      // passed directly.  Clang may use those instead of "byval" aggregate
3884
11.8k
      // types to avoid forcing arguments to memory unnecessarily.
3885
11.8k
      if (GPR_idx != Num_GPR_Regs) {
3886
11.0k
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3887
11.0k
        FuncInfo->addLiveInAttr(VReg, Flags);
3888
11.0k
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3889
11.0k
3890
11.0k
        if (ObjectVT == MVT::i32 || 
ObjectVT == MVT::i16.95k
)
3891
4.30k
          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3892
4.30k
          // value to MVT::i64 and then truncate to the correct register size.
3893
4.30k
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3894
11.0k
      } else {
3895
750
        if (CallConv == CallingConv::Fast)
3896
360
          ComputeArgOffset();
3897
750
3898
750
        needsLoad = true;
3899
750
        ArgSize = PtrByteSize;
3900
750
      }
3901
11.8k
      if (CallConv != CallingConv::Fast || 
needsLoad778
)
3902
11.3k
        ArgOffset += 8;
3903
11.8k
      break;
3904
11.8k
3905
11.8k
    case MVT::f32:
3906
3.59k
    case MVT::f64:
3907
3.59k
      // These can be scalar arguments or elements of a float array type
3908
3.59k
      // passed directly.  The latter are used to implement ELFv2 homogenous
3909
3.59k
      // float aggregates.
3910
3.59k
      if (FPR_idx != Num_FPR_Regs) {
3911
3.43k
        unsigned VReg;
3912
3.43k
3913
3.43k
        if (ObjectVT == MVT::f32)
3914
1.57k
          VReg = MF.addLiveIn(FPR[FPR_idx],
3915
1.57k
                              Subtarget.hasP8Vector()
3916
1.57k
                                  ? 
&PPC::VSSRCRegClass444
3917
1.57k
                                  : 
&PPC::F4RCRegClass1.13k
);
3918
1.86k
        else
3919
1.86k
          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3920
1.86k
                                                ? 
&PPC::VSFRCRegClass809
3921
1.86k
                                                : 
&PPC::F8RCRegClass1.05k
);
3922
3.43k
3923
3.43k
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3924
3.43k
        ++FPR_idx;
3925
3.43k
      } else 
if (153
GPR_idx != Num_GPR_Regs153
&&
CallConv != CallingConv::Fast27
) {
3926
27
        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3927
27
        // once we support fp <-> gpr moves.
3928
27
3929
27
        // This can only ever happen in the presence of f32 array types,
3930
27
        // since otherwise we never run out of FPRs before running out
3931
27
        // of GPRs.
3932
27
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3933
27
        FuncInfo->addLiveInAttr(VReg, Flags);
3934
27
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3935
27
3936
27
        if (ObjectVT == MVT::f32) {
3937
24
          if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 
00
))
3938
9
            ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3939
9
                                 DAG.getConstant(32, dl, MVT::i32));
3940
24
          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3941
24
        }
3942
27
3943
27
        ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3944
126
      } else {
3945
126
        if (CallConv == CallingConv::Fast)
3946
123
          ComputeArgOffset();
3947
126
3948
126
        needsLoad = true;
3949
126
      }
3950
3.59k
3951
3.59k
      // When passing an array of floats, the array occupies consecutive
3952
3.59k
      // space in the argument area; only round up to the next doubleword
3953
3.59k
      // at the end of the array.  Otherwise, each float takes 8 bytes.
3954
3.59k
      if (CallConv != CallingConv::Fast || 
needsLoad673
) {
3955
3.04k
        ArgSize = Flags.isInConsecutiveRegs() ? 
ObjSize483
:
PtrByteSize2.55k
;
3956
3.04k
        ArgOffset += ArgSize;
3957
3.04k
        if (Flags.isInConsecutiveRegsLast())
3958
69
          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3959
3.04k
      }
3960
3.59k
      break;
3961
4.95k
    case MVT::v4f32:
3962
4.95k
    case MVT::v4i32:
3963
4.95k
    case MVT::v8i16:
3964
4.95k
    case MVT::v16i8:
3965
4.95k
    case MVT::v2f64:
3966
4.95k
    case MVT::v2i64:
3967
4.95k
    case MVT::v1i128:
3968
4.95k
    case MVT::f128:
3969
4.95k
      if (!Subtarget.hasQPX()) {
3970
4.86k
        // These can be scalar arguments or elements of a vector array type
3971
4.86k
        // passed directly.  The latter are used to implement ELFv2 homogenous
3972
4.86k
        // vector aggregates.
3973
4.86k
        if (VR_idx != Num_VR_Regs) {
3974
4.55k
          unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3975
4.55k
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3976
4.55k
          ++VR_idx;
3977
4.55k
        } else {
3978
313
          if (CallConv == CallingConv::Fast)
3979
164
            ComputeArgOffset();
3980
313
          needsLoad = true;
3981
313
        }
3982
4.86k
        if (CallConv != CallingConv::Fast || 
needsLoad661
)
3983
4.36k
          ArgOffset += 16;
3984
4.86k
        break;
3985
4.86k
      } // not QPX
3986
90
3987
90
      assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3988
90
             "Invalid QPX parameter type");
3989
90
      LLVM_FALLTHROUGH;
3990
90
3991
221
    case MVT::v4f64:
3992
221
    case MVT::v4i1:
3993
221
      // QPX vectors are treated like their scalar floating-point subregisters
3994
221
      // (except that they're larger).
3995
221
      unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 
1690
:
32131
;
3996
221
      if (QFPR_idx != Num_QFPR_Regs) {
3997
221
        const TargetRegisterClass *RC;
3998
221
        switch (ObjectVT.getSimpleVT().SimpleTy) {
3999
221
        
case MVT::v4f64: RC = &PPC::QFRCRegClass; break77
;
4000
221
        
case MVT::v4f32: RC = &PPC::QSRCRegClass; break90
;
4001
221
        
default: RC = &PPC::QBRCRegClass; break54
;
4002
221
        }
4003
221
4004
221
        unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4005
221
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4006
221
        ++QFPR_idx;
4007
221
      } else {
4008
0
        if (CallConv == CallingConv::Fast)
4009
0
          ComputeArgOffset();
4010
0
        needsLoad = true;
4011
0
      }
4012
221
      if (CallConv != CallingConv::Fast || 
needsLoad0
)
4013
221
        ArgOffset += Sz;
4014
221
      break;
4015
20.4k
    }
4016
20.4k
4017
20.4k
    // We need to load the argument to a virtual register if we determined
4018
20.4k
    // above that we ran out of physical registers of the appropriate type.
4019
20.4k
    if (needsLoad) {
4020
1.18k
      if (ObjSize < ArgSize && 
!isLittleEndian94
)
4021
90
        CurArgOffset += ArgSize - ObjSize;
4022
1.18k
      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4023
1.18k
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4024
1.18k
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4025
1.18k
    }
4026
20.4k
4027
20.4k
    InVals.push_back(ArgVal);
4028
20.4k
  }
4029
10.1k
4030
10.1k
  // Area that is at least reserved in the caller of this function.
4031
10.1k
  unsigned MinReservedArea;
4032
10.1k
  if (HasParameterArea)
4033
4.96k
    MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4034
5.14k
  else
4035
5.14k
    MinReservedArea = LinkageSize;
4036
10.1k
4037
10.1k
  // Set the size that is at least reserved in caller of this function.  Tail
4038
10.1k
  // call optimized functions' reserved stack space needs to be aligned so that
4039
10.1k
  // taking the difference between two stack areas will result in an aligned
4040
10.1k
  // stack.
4041
10.1k
  MinReservedArea =
4042
10.1k
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4043
10.1k
  FuncInfo->setMinReservedArea(MinReservedArea);
4044
10.1k
4045
10.1k
  // If the function takes variable number of arguments, make a frame index for
4046
10.1k
  // the start of the first vararg value... for expansion of llvm.va_start.
4047
10.1k
  if (isVarArg) {
4048
13
    int Depth = ArgOffset;
4049
13
4050
13
    FuncInfo->setVarArgsFrameIndex(
4051
13
      MFI.CreateFixedObject(PtrByteSize, Depth, true));
4052
13
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4053
13
4054
13
    // If this function is vararg, store any remaining integer argument regs
4055
13
    // to their spots on the stack so that they may be loaded by dereferencing
4056
13
    // the result of va_next.
4057
13
    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4058
104
         GPR_idx < Num_GPR_Regs; 
++GPR_idx91
) {
4059
91
      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4060
91
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4061
91
      SDValue Store =
4062
91
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4063
91
      MemOps.push_back(Store);
4064
91
      // Increment the address by four for the next argument to store
4065
91
      SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4066
91
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4067
91
    }
4068
13
  }
4069
10.1k
4070
10.1k
  if (!MemOps.empty())
4071
47
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4072
10.1k
4073
10.1k
  return Chain;
4074
10.1k
}
4075
4076
SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4077
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4078
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4079
24
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4080
24
  // TODO: add description of PPC stack frame format, or at least some docs.
4081
24
  //
4082
24
  MachineFunction &MF = DAG.getMachineFunction();
4083
24
  MachineFrameInfo &MFI = MF.getFrameInfo();
4084
24
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4085
24
4086
24
  EVT PtrVT = getPointerTy(MF.getDataLayout());
4087
24
  bool isPPC64 = PtrVT == MVT::i64;
4088
24
  // Potential tail calls could cause overwriting of argument stack slots.
4089
24
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4090
24
                       
(CallConv == CallingConv::Fast)0
);
4091
24
  unsigned PtrByteSize = isPPC64 ? 
812
:
412
;
4092
24
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4093
24
  unsigned ArgOffset = LinkageSize;
4094
24
  // Area that is at least reserved in caller of this function.
4095
24
  unsigned MinReservedArea = ArgOffset;
4096
24
4097
24
  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4098
24
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4099
24
    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4100
24
  };
4101
24
  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4102
24
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4103
24
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4104
24
  };
4105
24
  static const MCPhysReg VR[] = {
4106
24
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4107
24
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4108
24
  };
4109
24
4110
24
  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4111
24
  const unsigned Num_FPR_Regs = useSoftFloat() ? 
00
: 13;
4112
24
  const unsigned Num_VR_Regs  = array_lengthof( VR);
4113
24
4114
24
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4115
24
4116
24
  const MCPhysReg *GPR = isPPC64 ? 
GPR_6412
:
GPR_3212
;
4117
24
4118
24
  // In 32-bit non-varargs functions, the stack space for vectors is after the
4119
24
  // stack space for non-vectors.  We do not use this space unless we have
4120
24
  // too many vectors to fit in registers, something that only occurs in
4121
24
  // constructed examples:), but we have to walk the arglist to figure
4122
24
  // that out...for the pathological case, compute VecArgOffset as the
4123
24
  // start of the vector parameter area.  Computing VecArgOffset is the
4124
24
  // entire point of the following loop.
4125
24
  unsigned VecArgOffset = ArgOffset;
4126
24
  if (!isVarArg && !isPPC64) {
4127
12
    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4128
12
         
++ArgNo0
) {
4129
0
      EVT ObjectVT = Ins[ArgNo].VT;
4130
0
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4131
0
4132
0
      if (Flags.isByVal()) {
4133
0
        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4134
0
        unsigned ObjSize = Flags.getByValSize();
4135
0
        unsigned ArgSize =
4136
0
                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4137
0
        VecArgOffset += ArgSize;
4138
0
        continue;
4139
0
      }
4140
0
4141
0
      switch(ObjectVT.getSimpleVT().SimpleTy) {
4142
0
      default: llvm_unreachable("Unhandled argument type!");
4143
0
      case MVT::i1:
4144
0
      case MVT::i32:
4145
0
      case MVT::f32:
4146
0
        VecArgOffset += 4;
4147
0
        break;
4148
0
      case MVT::i64:  // PPC64
4149
0
      case MVT::f64:
4150
0
        // FIXME: We are guaranteed to be !isPPC64 at this point.
4151
0
        // Does MVT::i64 apply?
4152
0
        VecArgOffset += 8;
4153
0
        break;
4154
0
      case MVT::v4f32:
4155
0
      case MVT::v4i32:
4156
0
      case MVT::v8i16:
4157
0
      case MVT::v16i8:
4158
0
        // Nothing to do, we're only looking at Nonvector args here.
4159
0
        break;
4160
0
      }
4161
0
    }
4162
12
  }
4163
24
  // We've found where the vector parameter area in memory is.  Skip the
4164
24
  // first 12 parameters; these don't use that memory.
4165
24
  VecArgOffset = ((VecArgOffset+15)/16)*16;
4166
24
  VecArgOffset += 12*16;
4167
24
4168
24
  // Add DAG nodes to load the arguments or copy them out of registers.  On
4169
24
  // entry to a function on PPC, the arguments start after the linkage area,
4170
24
  // although the first ones are often in registers.
4171
24
4172
24
  SmallVector<SDValue, 8> MemOps;
4173
24
  unsigned nAltivecParamsAtEnd = 0;
4174
24
  Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4175
24
  unsigned CurArgIdx = 0;
4176
24
  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 
++ArgNo0
) {
4177
0
    SDValue ArgVal;
4178
0
    bool needsLoad = false;
4179
0
    EVT ObjectVT = Ins[ArgNo].VT;
4180
0
    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4181
0
    unsigned ArgSize = ObjSize;
4182
0
    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4183
0
    if (Ins[ArgNo].isOrigArg()) {
4184
0
      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4185
0
      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4186
0
    }
4187
0
    unsigned CurArgOffset = ArgOffset;
4188
0
4189
0
    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4190
0
    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4191
0
        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4192
0
      if (isVarArg || isPPC64) {
4193
0
        MinReservedArea = ((MinReservedArea+15)/16)*16;
4194
0
        MinReservedArea += CalculateStackSlotSize(ObjectVT,
4195
0
                                                  Flags,
4196
0
                                                  PtrByteSize);
4197
0
      } else  nAltivecParamsAtEnd++;
4198
0
    } else
4199
0
      // Calculate min reserved area.
4200
0
      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4201
0
                                                Flags,
4202
0
                                                PtrByteSize);
4203
0
4204
0
    // FIXME the codegen can be much improved in some cases.
4205
0
    // We do not have to keep everything in memory.
4206
0
    if (Flags.isByVal()) {
4207
0
      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4208
0
4209
0
      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4210
0
      ObjSize = Flags.getByValSize();
4211
0
      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4212
0
      // Objects of size 1 and 2 are right justified, everything else is
4213
0
      // left justified.  This means the memory address is adjusted forwards.
4214
0
      if (ObjSize==1 || ObjSize==2) {
4215
0
        CurArgOffset = CurArgOffset + (4 - ObjSize);
4216
0
      }
4217
0
      // The value of the object is its address.
4218
0
      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4219
0
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4220
0
      InVals.push_back(FIN);
4221
0
      if (ObjSize==1 || ObjSize==2) {
4222
0
        if (GPR_idx != Num_GPR_Regs) {
4223
0
          unsigned VReg;
4224
0
          if (isPPC64)
4225
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4226
0
          else
4227
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4228
0
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4229
0
          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4230
0
          SDValue Store =
4231
0
              DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4232
0
                                MachinePointerInfo(&*FuncArg), ObjType);
4233
0
          MemOps.push_back(Store);
4234
0
          ++GPR_idx;
4235
0
        }
4236
0
4237
0
        ArgOffset += PtrByteSize;
4238
0
4239
0
        continue;
4240
0
      }
4241
0
      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4242
0
        // Store whatever pieces of the object are in registers
4243
0
        // to memory.  ArgOffset will be the address of the beginning
4244
0
        // of the object.
4245
0
        if (GPR_idx != Num_GPR_Regs) {
4246
0
          unsigned VReg;
4247
0
          if (isPPC64)
4248
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4249
0
          else
4250
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4251
0
          int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4252
0
          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4253
0
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4254
0
          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4255
0
                                       MachinePointerInfo(&*FuncArg, j));
4256
0
          MemOps.push_back(Store);
4257
0
          ++GPR_idx;
4258
0
          ArgOffset += PtrByteSize;
4259
0
        } else {
4260
0
          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4261
0
          break;
4262
0
        }
4263
0
      }
4264
0
      continue;
4265
0
    }
4266
0
4267
0
    switch (ObjectVT.getSimpleVT().SimpleTy) {
4268
0
    default: llvm_unreachable("Unhandled argument type!");
4269
0
    case MVT::i1:
4270
0
    case MVT::i32:
4271
0
      if (!isPPC64) {
4272
0
        if (GPR_idx != Num_GPR_Regs) {
4273
0
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4274
0
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4275
0
4276
0
          if (ObjectVT == MVT::i1)
4277
0
            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4278
0
4279
0
          ++GPR_idx;
4280
0
        } else {
4281
0
          needsLoad = true;
4282
0
          ArgSize = PtrByteSize;
4283
0
        }
4284
0
        // All int arguments reserve stack space in the Darwin ABI.
4285
0
        ArgOffset += PtrByteSize;
4286
0
        break;
4287
0
      }
4288
0
      LLVM_FALLTHROUGH;
4289
0
    case MVT::i64:  // PPC64
4290
0
      if (GPR_idx != Num_GPR_Regs) {
4291
0
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4292
0
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4293
0
4294
0
        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4295
0
          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4296
0
          // value to MVT::i64 and then truncate to the correct register size.
4297
0
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4298
0
4299
0
        ++GPR_idx;
4300
0
      } else {
4301
0
        needsLoad = true;
4302
0
        ArgSize = PtrByteSize;
4303
0
      }
4304
0
      // All int arguments reserve stack space in the Darwin ABI.
4305
0
      ArgOffset += 8;
4306
0
      break;
4307
0
4308
0
    case MVT::f32:
4309
0
    case MVT::f64:
4310
0
      // Every 4 bytes of argument space consumes one of the GPRs available for
4311
0
      // argument passing.
4312
0
      if (GPR_idx != Num_GPR_Regs) {
4313
0
        ++GPR_idx;
4314
0
        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4315
0
          ++GPR_idx;
4316
0
      }
4317
0
      if (FPR_idx != Num_FPR_Regs) {
4318
0
        unsigned VReg;
4319
0
4320
0
        if (ObjectVT == MVT::f32)
4321
0
          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4322
0
        else
4323
0
          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4324
0
4325
0
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4326
0
        ++FPR_idx;
4327
0
      } else {
4328
0
        needsLoad = true;
4329
0
      }
4330
0
4331
0
      // All FP arguments reserve stack space in the Darwin ABI.
4332
0
      ArgOffset += isPPC64 ? 8 : ObjSize;
4333
0
      break;
4334
0
    case MVT::v4f32:
4335
0
    case MVT::v4i32:
4336
0
    case MVT::v8i16:
4337
0
    case MVT::v16i8:
4338
0
      // Note that vector arguments in registers don't reserve stack space,
4339
0
      // except in varargs functions.
4340
0
      if (VR_idx != Num_VR_Regs) {
4341
0
        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4342
0
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4343
0
        if (isVarArg) {
4344
0
          while ((ArgOffset % 16) != 0) {
4345
0
            ArgOffset += PtrByteSize;
4346
0
            if (GPR_idx != Num_GPR_Regs)
4347
0
              GPR_idx++;
4348
0
          }
4349
0
          ArgOffset += 16;
4350
0
          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4351
0
        }
4352
0
        ++VR_idx;
4353
0
      } else {
4354
0
        if (!isVarArg && !isPPC64) {
4355
0
          // Vectors go after all the nonvectors.
4356
0
          CurArgOffset = VecArgOffset;
4357
0
          VecArgOffset += 16;
4358
0
        } else {
4359
0
          // Vectors are aligned.
4360
0
          ArgOffset = ((ArgOffset+15)/16)*16;
4361
0
          CurArgOffset = ArgOffset;
4362
0
          ArgOffset += 16;
4363
0
        }
4364
0
        needsLoad = true;
4365
0
      }
4366
0
      break;
4367
0
    }
4368
0
4369
0
    // We need to load the argument to a virtual register if we determined above
4370
0
    // that we ran out of physical registers of the appropriate type.
4371
0
    if (needsLoad) {
4372
0
      int FI = MFI.CreateFixedObject(ObjSize,
4373
0
                                     CurArgOffset + (ArgSize - ObjSize),
4374
0
                                     isImmutable);
4375
0
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4376
0
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4377
0
    }
4378
0
4379
0
    InVals.push_back(ArgVal);
4380
0
  }
4381
24
4382
24
  // Allow for Altivec parameters at the end, if needed.
4383
24
  if (nAltivecParamsAtEnd) {
4384
0
    MinReservedArea = ((MinReservedArea+15)/16)*16;
4385
0
    MinReservedArea += 16*nAltivecParamsAtEnd;
4386
0
  }
4387
24
4388
24
  // Area that is at least reserved in the caller of this function.
4389
24
  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4390
24
4391
24
  // Set the size that is at least reserved in caller of this function.  Tail
4392
24
  // call optimized functions' reserved stack space needs to be aligned so that
4393
24
  // taking the difference between two stack areas will result in an aligned
4394
24
  // stack.
4395
24
  MinReservedArea =
4396
24
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4397
24
  FuncInfo->setMinReservedArea(MinReservedArea);
4398
24
4399
24
  // If the function takes variable number of arguments, make a frame index for
4400
24
  // the start of the first vararg value... for expansion of llvm.va_start.
4401
24
  if (isVarArg) {
4402
0
    int Depth = ArgOffset;
4403
0
4404
0
    FuncInfo->setVarArgsFrameIndex(
4405
0
      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4406
0
                            Depth, true));
4407
0
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4408
0
4409
0
    // If this function is vararg, store any remaining integer argument regs
4410
0
    // to their spots on the stack so that they may be loaded by dereferencing
4411
0
    // the result of va_next.
4412
0
    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4413
0
      unsigned VReg;
4414
0
4415
0
      if (isPPC64)
4416
0
        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4417
0
      else
4418
0
        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4419
0
4420
0
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4421
0
      SDValue Store =
4422
0
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4423
0
      MemOps.push_back(Store);
4424
0
      // Increment the address by four for the next argument to store
4425
0
      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4426
0
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4427
0
    }
4428
0
  }
4429
24
4430
24
  if (!MemOps.empty())
4431
0
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4432
24
4433
24
  return Chain;
4434
24
}
4435
4436
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4437
/// adjusted to accommodate the arguments for the tailcall.
4438
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4439
2.13k
                                   unsigned ParamSize) {
4440
2.13k
4441
2.13k
  if (!isTailCall) 
return 02.13k
;
4442
3
4443
3
  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4444
3
  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4445
3
  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4446
3
  // Remember only if the new adjustment is bigger.
4447
3
  if (SPDiff < FI->getTailCallSPDelta())
4448
0
    FI->setTailCallSPDelta(SPDiff);
4449
3
4450
3
  return SPDiff;
4451
3
}
4452
4453
static bool isFunctionGlobalAddress(SDValue Callee);
4454
4455
static bool
4456
callsShareTOCBase(const Function *Caller, SDValue Callee,
4457
1.83k
                    const TargetMachine &TM) {
4458
1.83k
   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4459
1.83k
   // don't have enough information to determine if the caller and calle share
4460
1.83k
   // the same  TOC base, so we have to pessimistically assume they don't for
4461
1.83k
   // correctness.
4462
1.83k
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4463
1.83k
   if (!G)
4464
511
     return false;
4465
1.32k
4466
1.32k
   const GlobalValue *GV = G->getGlobal();
4467
1.32k
  // The medium and large code models are expected to provide a sufficiently
4468
1.32k
  // large TOC to provide all data addressing needs of a module with a
4469
1.32k
  // single TOC. Since each module will be addressed with a single TOC then we
4470
1.32k
  // only need to check that caller and callee don't cross dso boundaries.
4471
1.32k
  if (CodeModel::Medium == TM.getCodeModel() ||
4472
1.32k
      
CodeModel::Large == TM.getCodeModel()56
)
4473
1.26k
    return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4474
55
4475
55
  // Otherwise we need to ensure callee and caller are in the same section,
4476
55
  // since the linker may allocate multiple TOCs, and we don't know which
4477
55
  // sections will belong to the same TOC base.
4478
55
4479
55
  if (!GV->isStrongDefinitionForLinker())
4480
33
    return false;
4481
22
4482
22
  // Any explicitly-specified sections and section prefixes must also match.
4483
22
  // Also, if we're using -ffunction-sections, then each function is always in
4484
22
  // a different section (the same is true for COMDAT functions).
4485
22
  if (TM.getFunctionSections() || GV->hasComdat() || 
Caller->hasComdat()20
||
4486
22
      
GV->getSection() != Caller->getSection()20
)
4487
4
    return false;
4488
18
  if (const auto *F = dyn_cast<Function>(GV)) {
4489
18
    if (F->getSectionPrefix() != Caller->getSectionPrefix())
4490
0
      return false;
4491
18
  }
4492
18
4493
18
  // If the callee might be interposed, then we can't assume the ultimate call
4494
18
  // target will be in the same section. Even in cases where we can assume that
4495
18
  // interposition won't happen, in any case where the linker might insert a
4496
18
  // stub to allow for interposition, we must generate code as though
4497
18
  // interposition might occur. To understand why this matters, consider a
4498
18
  // situation where: a -> b -> c where the arrows indicate calls. b and c are
4499
18
  // in the same section, but a is in a different module (i.e. has a different
4500
18
  // TOC base pointer). If the linker allows for interposition between b and c,
4501
18
  // then it will generate a stub for the call edge between b and c which will
4502
18
  // save the TOC pointer into the designated stack slot allocated by b. If we
4503
18
  // return true here, and therefore allow a tail call between b and c, that
4504
18
  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4505
18
  // pointer into the stack slot allocated by a (where the a -> b stub saved
4506
18
  // a's TOC base pointer). If we're not considering a tail call, but rather,
4507
18
  // whether a nop is needed after the call instruction in b, because the linker
4508
18
  // will insert a stub, it might complain about a missing nop if we omit it
4509
18
  // (although many don't complain in this case).
4510
18
  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4511
1
    return false;
4512
17
4513
17
  return true;
4514
17
}
4515
4516
static bool
4517
needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4518
53
                            const SmallVectorImpl<ISD::OutputArg> &Outs) {
4519
53
  assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4520
53
4521
53
  const unsigned PtrByteSize = 8;
4522
53
  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4523
53
4524
53
  static const MCPhysReg GPR[] = {
4525
53
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4526
53
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4527
53
  };
4528
53
  static const MCPhysReg VR[] = {
4529
53
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4530
53
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4531
53
  };
4532
53
4533
53
  const unsigned NumGPRs = array_lengthof(GPR);
4534
53
  const unsigned NumFPRs = 13;
4535
53
  const unsigned NumVRs = array_lengthof(VR);
4536
53
  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4537
53
4538
53
  unsigned NumBytes = LinkageSize;
4539
53
  unsigned AvailableFPRs = NumFPRs;
4540
53
  unsigned AvailableVRs = NumVRs;
4541
53
4542
359
  for (const ISD::OutputArg& Param : Outs) {
4543
359
    if (Param.Flags.isNest()) 
continue0
;
4544
359
4545
359
    if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4546
359
                               PtrByteSize, LinkageSize, ParamAreaSize,
4547
359
                               NumBytes, AvailableFPRs, AvailableVRs,
4548
359
                               Subtarget.hasQPX()))
4549
35
      return true;
4550
359
  }
4551
53
  
return false18
;
4552
53
}
4553
4554
static bool
4555
66
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4556
66
  if (CS.arg_size() != CallerFn->arg_size())
4557
11
    return false;
4558
55
4559
55
  ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4560
55
  ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4561
55
  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4562
55
4563
119
  for (; CalleeArgIter != CalleeArgEnd; 
++CalleeArgIter, ++CallerArgIter64
) {
4564
80
    const Value* CalleeArg = *CalleeArgIter;
4565
80
    const Value* CallerArg = &(*CallerArgIter);
4566
80
    if (CalleeArg == CallerArg)
4567
60
      continue;
4568
20
4569
20
    // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4570
20
    //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4571
20
    //      }
4572
20
    // 1st argument of callee is undef and has the same type as caller.
4573
20
    if (CalleeArg->getType() == CallerArg->getType() &&
4574
20
        
isa<UndefValue>(CalleeArg)12
)
4575
4
      continue;
4576
16
4577
16
    return false;
4578
16
  }
4579
55
4580
55
  
return true39
;
4581
55
}
4582
4583
// Returns true if TCO is possible between the callers and callees
4584
// calling conventions.
4585
static bool
4586
areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4587
344
                                    CallingConv::ID CalleeCC) {
4588
344
  // Tail calls are possible with fastcc and ccc.
4589
688
  auto isTailCallableCC  = [] (CallingConv::ID CC){
4590
688
      return  CC == CallingConv::C || 
CC == CallingConv::Fast33
;
4591
688
  };
4592
344
  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4593
0
    return false;
4594
344
4595
344
  // We can safely tail call both fastcc and ccc callees from a c calling
4596
344
  // convention caller. If the caller is fastcc, we may have less stack space
4597
344
  // than a non-fastcc caller with the same signature so disable tail-calls in
4598
344
  // that case.
4599
344
  return CallerCC == CallingConv::C || 
CallerCC == CalleeCC2
;
4600
344
}
4601
4602
bool
4603
PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4604
                                    SDValue Callee,
4605
                                    CallingConv::ID CalleeCC,
4606
                                    ImmutableCallSite CS,
4607
                                    bool isVarArg,
4608
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4609
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4610
209
                                    SelectionDAG& DAG) const {
4611
209
  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4612
209
4613
209
  if (DisableSCO && 
!TailCallOpt0
)
return false0
;
4614
209
4615
209
  // Variadic argument functions are not supported.
4616
209
  if (isVarArg) 
return false7
;
4617
202
4618
202
  auto &Caller = DAG.getMachineFunction().getFunction();
4619
202
  // Check that the calling conventions are compatible for tco.
4620
202
  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4621
0
    return false;
4622
202
4623
202
  // Caller contains any byval parameter is not supported.
4624
202
  if (any_of(Ins, [](const ISD::InputArg &IA) 
{ return IA.Flags.isByVal(); }74
))
4625
0
    return false;
4626
202
4627
202
  // Callee contains any byval parameter is not supported, too.
4628
202
  // Note: This is a quick work around, because in some cases, e.g.
4629
202
  // caller's stack size > callee's stack size, we are still able to apply
4630
202
  // sibling call optimization. For example, gcc is able to do SCO for caller1
4631
202
  // in the following example, but not for caller2.
4632
202
  //   struct test {
4633
202
  //     long int a;
4634
202
  //     char ary[56];
4635
202
  //   } gTest;
4636
202
  //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4637
202
  //     b->a = v.a;
4638
202
  //     return 0;
4639
202
  //   }
4640
202
  //   void caller1(struct test a, struct test c, struct test *b) {
4641
202
  //     callee(gTest, b); }
4642
202
  //   void caller2(struct test *b) { callee(gTest, b); }
4643
2.07k
  
if (202
any_of(Outs, [](const ISD::OutputArg& OA) 202
{ return OA.Flags.isByVal(); }))
4644
6
    return false;
4645
196
4646
196
  // If callee and caller use different calling conventions, we cannot pass
4647
196
  // parameters on stack since offsets for the parameter area may be different.
4648
196
  if (Caller.getCallingConv() != CalleeCC &&
4649
196
      
needStackSlotPassParameters(Subtarget, Outs)26
)
4650
26
    return false;
4651
170
4652
170
  // No TCO/SCO on indirect call because Caller have to restore its TOC
4653
170
  if (!isFunctionGlobalAddress(Callee) &&
4654
170
      
!isa<ExternalSymbolSDNode>(Callee)14
)
4655
13
    return false;
4656
157
4657
157
  // If the caller and callee potentially have different TOC bases then we
4658
157
  // cannot tail call since we need to restore the TOC pointer after the call.
4659
157
  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4660
157
  if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4661
90
    return false;
4662
67
4663
67
  // TCO allows altering callee ABI, so we don't have to check further.
4664
67
  if (CalleeCC == CallingConv::Fast && 
TailCallOpt2
)
4665
1
    return true;
4666
66
4667
66
  if (DisableSCO) 
return false0
;
4668
66
4669
66
  // If callee use the same argument list that caller is using, then we can
4670
66
  // apply SCO on this case. If it is not, then we need to check if callee needs
4671
66
  // stack for passing arguments.
4672
66
  if (!hasSameArgumentList(&Caller, CS) &&
4673
66
      
needStackSlotPassParameters(Subtarget, Outs)27
) {
4674
9
    return false;
4675
9
  }
4676
57
4677
57
  return true;
4678
57
}
4679
4680
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4681
/// for tail call optimization. Targets which want to do tail call
4682
/// optimization should implement this function.
4683
bool
4684
PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4685
                                                     CallingConv::ID CalleeCC,
4686
                                                     bool isVarArg,
4687
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
4688
15
                                                     SelectionDAG& DAG) const {
4689
15
  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4690
13
    return false;
4691
2
4692
2
  // Variable argument functions are not supported.
4693
2
  if (isVarArg)
4694
0
    return false;
4695
2
4696
2
  MachineFunction &MF = DAG.getMachineFunction();
4697
2
  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4698
2
  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4699
2
    // Functions containing by val parameters are not supported.
4700
4
    for (unsigned i = 0; i != Ins.size(); 
i++2
) {
4701
2
       ISD::ArgFlagsTy Flags = Ins[i].Flags;
4702
2
       if (Flags.isByVal()) 
return false0
;
4703
2
    }
4704
2
4705
2
    // Non-PIC/GOT tail calls are supported.
4706
2
    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4707
1
      return true;
4708
1
4709
1
    // At the moment we can only do local tail calls (in same module, hidden
4710
1
    // or protected) if we are generating PIC.
4711
1
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4712
1
      return G->getGlobal()->hasHiddenVisibility()
4713
1
          || G->getGlobal()->hasProtectedVisibility();
4714
0
  }
4715
0
4716
0
  return false;
4717
0
}
4718
4719
/// isCallCompatibleAddress - Return the immediate to use if the specified
4720
/// 32-bit value is representable in the immediate field of a BxA instruction.
4721
410
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4722
410
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4723
410
  if (!C) 
return nullptr405
;
4724
5
4725
5
  int Addr = C->getZExtValue();
4726
5
  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4727
5
      SignExtend32<26>(Addr) != Addr)
4728
0
    return nullptr;  // Top 6 bits have to be sext of immediate.
4729
5
4730
5
  return DAG
4731
5
      .getConstant(
4732
5
          (int)C->getZExtValue() >> 2, SDLoc(Op),
4733
5
          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4734
5
      .getNode();
4735
5
}
4736
4737
namespace {
4738
4739
struct TailCallArgumentInfo {
4740
  SDValue Arg;
4741
  SDValue FrameIdxOp;
4742
  int FrameIdx = 0;
4743
4744
64
  TailCallArgumentInfo() = default;
4745
};
4746
4747
} // end anonymous namespace
4748
4749
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4750
static void StoreTailCallArgumentsToStackSlot(
4751
    SelectionDAG &DAG, SDValue Chain,
4752
    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4753
3
    SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4754
3
  for (unsigned i = 0, e = TailCallArgs.size(); i != e; 
++i0
) {
4755
0
    SDValue Arg = TailCallArgs[i].Arg;
4756
0
    SDValue FIN = TailCallArgs[i].FrameIdxOp;
4757
0
    int FI = TailCallArgs[i].FrameIdx;
4758
0
    // Store relative to framepointer.
4759
0
    MemOpChains.push_back(DAG.getStore(
4760
0
        Chain, dl, Arg, FIN,
4761
0
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4762
0
  }
4763
3
}
4764
4765
/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4766
/// the appropriate stack slot for the tail call optimized function call.
4767
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4768
                                             SDValue OldRetAddr, SDValue OldFP,
4769
3
                                             int SPDiff, const SDLoc &dl) {
4770
3
  if (SPDiff) {
4771
3
    // Calculate the new stack slot for the return address.
4772
3
    MachineFunction &MF = DAG.getMachineFunction();
4773
3
    const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4774
3
    const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4775
3
    bool isPPC64 = Subtarget.isPPC64();
4776
3
    int SlotSize = isPPC64 ? 
81
:
42
;
4777
3
    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4778
3
    int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4779
3
                                                         NewRetAddrLoc, true);
4780
3
    EVT VT = isPPC64 ? 
MVT::i641
:
MVT::i322
;
4781
3
    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4782
3
    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4783
3
                         MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4784
3
4785
3
    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4786
3
    // slot as the FP is never overwritten.
4787
3
    if (Subtarget.isDarwinABI()) {
4788
0
      int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4789
0
      int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4790
0
                                                         true);
4791
0
      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4792
0
      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4793
0
                           MachinePointerInfo::getFixedStack(
4794
0
                               DAG.getMachineFunction(), NewFPIdx));
4795
0
    }
4796
3
  }
4797
3
  return Chain;
4798
3
}
4799
4800
/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4801
/// the position of the argument.
4802
static void
4803
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4804
                         SDValue Arg, int SPDiff, unsigned ArgOffset,
4805
64
                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4806
64
  int Offset = ArgOffset + SPDiff;
4807
64
  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4808
64
  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4809
64
  EVT VT = isPPC64 ? MVT::i64 : 
MVT::i320
;
4810
64
  SDValue FIN = DAG.getFrameIndex(FI, VT);
4811
64
  TailCallArgumentInfo Info;
4812
64
  Info.Arg = Arg;
4813
64
  Info.FrameIdxOp = FIN;
4814
64
  Info.FrameIdx = FI;
4815
64
  TailCallArguments.push_back(Info);
4816
64
}
4817
4818
/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4819
/// stack slot. Returns the chain as result and the loaded frame pointers in
4820
/// LROpOut/FPOpout. Used when tail calling.
4821
SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4822
    SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4823
2.19k
    SDValue &FPOpOut, const SDLoc &dl) const {
4824
2.19k
  if (SPDiff) {
4825
3
    // Load the LR and FP stack slot for later adjusting.
4826
3
    EVT VT = Subtarget.isPPC64() ? 
MVT::i641
:
MVT::i322
;
4827
3
    LROpOut = getReturnAddrFrameIndex(DAG);
4828
3
    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4829
3
    Chain = SDValue(LROpOut.getNode(), 1);
4830
3
4831
3
    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4832
3
    // slot as the FP is never overwritten.
4833
3
    if (Subtarget.isDarwinABI()) {
4834
0
      FPOpOut = getFramePointerFrameIndex(DAG);
4835
0
      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4836
0
      Chain = SDValue(FPOpOut.getNode(), 1);
4837
0
    }
4838
3
  }
4839
2.19k
  return Chain;
4840
2.19k
}
4841
4842
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4843
/// by "Src" to address "Dst" of size "Size".  Alignment information is
4844
/// specified by the specific parameter attribute. The copy will be passed as
4845
/// a byval function parameter.
4846
/// Sometimes what we are copying is the end of a larger object, the part that
4847
/// does not fit in registers.
4848
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4849
                                         SDValue Chain, ISD::ArgFlagsTy Flags,
4850
54
                                         SelectionDAG &DAG, const SDLoc &dl) {
4851
54
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4852
54
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4853
54
                       false, false, false, MachinePointerInfo(),
4854
54
                       MachinePointerInfo());
4855
54
}
4856
4857
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4858
/// tail calls.
4859
static void LowerMemOpCallTo(
4860
    SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4861
    SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4862
    bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4863
512
    SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4864
512
  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4865
512
  if (!isTailCall) {
4866
448
    if (isVector) {
4867
88
      SDValue StackPtr;
4868
88
      if (isPPC64)
4869
88
        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4870
0
      else
4871
0
        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4872
88
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4873
88
                           DAG.getConstant(ArgOffset, dl, PtrVT));
4874
88
    }
4875
448
    MemOpChains.push_back(
4876
448
        DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4877
448
    // Calculate and remember argument location.
4878
448
  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4879
64
                                  TailCallArguments);
4880
512
}
4881
4882
static void
4883
PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4884
                const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4885
                SDValue FPOp,
4886
3
                SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
<