Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the PPCISelLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "PPCISelLowering.h"
14
#include "MCTargetDesc/PPCPredicates.h"
15
#include "PPC.h"
16
#include "PPCCCState.h"
17
#include "PPCCallingConv.h"
18
#include "PPCFrameLowering.h"
19
#include "PPCInstrInfo.h"
20
#include "PPCMachineFunctionInfo.h"
21
#include "PPCPerfectShuffle.h"
22
#include "PPCRegisterInfo.h"
23
#include "PPCSubtarget.h"
24
#include "PPCTargetMachine.h"
25
#include "llvm/ADT/APFloat.h"
26
#include "llvm/ADT/APInt.h"
27
#include "llvm/ADT/ArrayRef.h"
28
#include "llvm/ADT/DenseMap.h"
29
#include "llvm/ADT/None.h"
30
#include "llvm/ADT/STLExtras.h"
31
#include "llvm/ADT/SmallPtrSet.h"
32
#include "llvm/ADT/SmallSet.h"
33
#include "llvm/ADT/SmallVector.h"
34
#include "llvm/ADT/Statistic.h"
35
#include "llvm/ADT/StringRef.h"
36
#include "llvm/ADT/StringSwitch.h"
37
#include "llvm/CodeGen/CallingConvLower.h"
38
#include "llvm/CodeGen/ISDOpcodes.h"
39
#include "llvm/CodeGen/MachineBasicBlock.h"
40
#include "llvm/CodeGen/MachineFrameInfo.h"
41
#include "llvm/CodeGen/MachineFunction.h"
42
#include "llvm/CodeGen/MachineInstr.h"
43
#include "llvm/CodeGen/MachineInstrBuilder.h"
44
#include "llvm/CodeGen/MachineJumpTableInfo.h"
45
#include "llvm/CodeGen/MachineLoopInfo.h"
46
#include "llvm/CodeGen/MachineMemOperand.h"
47
#include "llvm/CodeGen/MachineModuleInfo.h"
48
#include "llvm/CodeGen/MachineOperand.h"
49
#include "llvm/CodeGen/MachineRegisterInfo.h"
50
#include "llvm/CodeGen/RuntimeLibcalls.h"
51
#include "llvm/CodeGen/SelectionDAG.h"
52
#include "llvm/CodeGen/SelectionDAGNodes.h"
53
#include "llvm/CodeGen/TargetInstrInfo.h"
54
#include "llvm/CodeGen/TargetLowering.h"
55
#include "llvm/CodeGen/TargetRegisterInfo.h"
56
#include "llvm/CodeGen/ValueTypes.h"
57
#include "llvm/IR/CallSite.h"
58
#include "llvm/IR/CallingConv.h"
59
#include "llvm/IR/Constant.h"
60
#include "llvm/IR/Constants.h"
61
#include "llvm/IR/DataLayout.h"
62
#include "llvm/IR/DebugLoc.h"
63
#include "llvm/IR/DerivedTypes.h"
64
#include "llvm/IR/Function.h"
65
#include "llvm/IR/GlobalValue.h"
66
#include "llvm/IR/IRBuilder.h"
67
#include "llvm/IR/Instructions.h"
68
#include "llvm/IR/Intrinsics.h"
69
#include "llvm/IR/Module.h"
70
#include "llvm/IR/Type.h"
71
#include "llvm/IR/Use.h"
72
#include "llvm/IR/Value.h"
73
#include "llvm/MC/MCContext.h"
74
#include "llvm/MC/MCExpr.h"
75
#include "llvm/MC/MCRegisterInfo.h"
76
#include "llvm/MC/MCSymbolXCOFF.h"
77
#include "llvm/Support/AtomicOrdering.h"
78
#include "llvm/Support/BranchProbability.h"
79
#include "llvm/Support/Casting.h"
80
#include "llvm/Support/CodeGen.h"
81
#include "llvm/Support/CommandLine.h"
82
#include "llvm/Support/Compiler.h"
83
#include "llvm/Support/Debug.h"
84
#include "llvm/Support/ErrorHandling.h"
85
#include "llvm/Support/Format.h"
86
#include "llvm/Support/KnownBits.h"
87
#include "llvm/Support/MachineValueType.h"
88
#include "llvm/Support/MathExtras.h"
89
#include "llvm/Support/raw_ostream.h"
90
#include "llvm/Target/TargetMachine.h"
91
#include "llvm/Target/TargetOptions.h"
92
#include <algorithm>
93
#include <cassert>
94
#include <cstdint>
95
#include <iterator>
96
#include <list>
97
#include <utility>
98
#include <vector>
99
100
using namespace llvm;
101
102
#define DEBUG_TYPE "ppc-lowering"
103
104
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
105
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
106
107
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
108
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
109
110
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
112
113
static cl::opt<bool> DisableSCO("disable-ppc-sco",
114
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
115
116
static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117
cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
118
119
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
121
122
STATISTIC(NumTailCalls, "Number of tail calls");
123
STATISTIC(NumSiblingCalls, "Number of sibling calls");
124
125
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
126
127
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
128
129
// FIXME: Remove this once the bug has been fixed!
130
extern cl::opt<bool> ANDIGlueBug;
131
132
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
133
                                     const PPCSubtarget &STI)
134
1.85k
    : TargetLowering(TM), Subtarget(STI) {
135
1.85k
  // Use _setjmp/_longjmp instead of setjmp/longjmp.
136
1.85k
  setUseUnderscoreSetJmp(true);
137
1.85k
  setUseUnderscoreLongJmp(true);
138
1.85k
139
1.85k
  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140
1.85k
  // arguments are at least 4/8 bytes aligned.
141
1.85k
  bool isPPC64 = Subtarget.isPPC64();
142
1.85k
  setMinStackArgumentAlignment(isPPC64 ? 
81.46k
:
4387
);
143
1.85k
144
1.85k
  // Set up the register classes.
145
1.85k
  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146
1.85k
  if (!useSoftFloat()) {
147
1.84k
    if (hasSPE()) {
148
4
      addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
149
4
      addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150
1.84k
    } else {
151
1.84k
      addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152
1.84k
      addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153
1.84k
    }
154
1.84k
  }
155
1.85k
156
1.85k
  // Match BITREVERSE to customized fast code sequence in the td file.
157
1.85k
  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158
1.85k
  setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159
1.85k
160
1.85k
  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161
1.85k
  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162
1.85k
163
1.85k
  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164
11.1k
  for (MVT VT : MVT::integer_valuetypes()) {
165
11.1k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166
11.1k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167
11.1k
  }
168
1.85k
169
1.85k
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
170
1.85k
171
1.85k
  // PowerPC has pre-inc load and store's.
172
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
173
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
174
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
175
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
176
1.85k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
177
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
178
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
179
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
180
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
181
1.85k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
182
1.85k
  if (!Subtarget.hasSPE()) {
183
1.84k
    setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
184
1.84k
    setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
185
1.84k
    setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
186
1.84k
    setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
187
1.84k
  }
188
1.85k
189
1.85k
  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190
1.85k
  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
191
3.70k
  for (MVT VT : ScalarIntVTs) {
192
3.70k
    setOperationAction(ISD::ADDC, VT, Legal);
193
3.70k
    setOperationAction(ISD::ADDE, VT, Legal);
194
3.70k
    setOperationAction(ISD::SUBC, VT, Legal);
195
3.70k
    setOperationAction(ISD::SUBE, VT, Legal);
196
3.70k
  }
197
1.85k
198
1.85k
  if (Subtarget.useCRBits()) {
199
1.62k
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
200
1.62k
201
1.62k
    if (isPPC64 || 
Subtarget.hasFPCVT()361
) {
202
1.26k
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
203
1.26k
      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
204
1.26k
                         isPPC64 ? 
MVT::i641.25k
:
MVT::i329
);
205
1.26k
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
206
1.26k
      AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
207
1.26k
                        isPPC64 ? 
MVT::i641.25k
:
MVT::i329
);
208
1.26k
    } else {
209
352
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
210
352
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
211
352
    }
212
1.62k
213
1.62k
    // PowerPC does not support direct load/store of condition registers.
214
1.62k
    setOperationAction(ISD::LOAD, MVT::i1, Custom);
215
1.62k
    setOperationAction(ISD::STORE, MVT::i1, Custom);
216
1.62k
217
1.62k
    // FIXME: Remove this once the ANDI glue bug is fixed:
218
1.62k
    if (ANDIGlueBug)
219
0
      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
220
1.62k
221
9.72k
    for (MVT VT : MVT::integer_valuetypes()) {
222
9.72k
      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
223
9.72k
      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
224
9.72k
      setTruncStoreAction(VT, MVT::i1, Expand);
225
9.72k
    }
226
1.62k
227
1.62k
    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
228
1.62k
  }
229
1.85k
230
1.85k
  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231
1.85k
  // PPC (the libcall is not available).
232
1.85k
  setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
233
1.85k
  setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
234
1.85k
235
1.85k
  // We do not currently implement these libm ops for PowerPC.
236
1.85k
  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
237
1.85k
  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
238
1.85k
  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
239
1.85k
  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
240
1.85k
  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
241
1.85k
  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
242
1.85k
243
1.85k
  // PowerPC has no SREM/UREM instructions unless we are on P9
244
1.85k
  // On P9 we may use a hardware instruction to compute the remainder.
245
1.85k
  // The instructions are not legalized directly because in the cases where the
246
1.85k
  // result of both the remainder and the division is required it is more
247
1.85k
  // efficient to compute the remainder from the result of the division rather
248
1.85k
  // than use the remainder instruction.
249
1.85k
  if (Subtarget.isISA3_0()) {
250
214
    setOperationAction(ISD::SREM, MVT::i32, Custom);
251
214
    setOperationAction(ISD::UREM, MVT::i32, Custom);
252
214
    setOperationAction(ISD::SREM, MVT::i64, Custom);
253
214
    setOperationAction(ISD::UREM, MVT::i64, Custom);
254
1.63k
  } else {
255
1.63k
    setOperationAction(ISD::SREM, MVT::i32, Expand);
256
1.63k
    setOperationAction(ISD::UREM, MVT::i32, Expand);
257
1.63k
    setOperationAction(ISD::SREM, MVT::i64, Expand);
258
1.63k
    setOperationAction(ISD::UREM, MVT::i64, Expand);
259
1.63k
  }
260
1.85k
261
1.85k
  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262
1.85k
  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
263
1.85k
  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
264
1.85k
  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
265
1.85k
  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
266
1.85k
  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
267
1.85k
  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
268
1.85k
  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
269
1.85k
  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
270
1.85k
271
1.85k
  // We don't support sin/cos/sqrt/fmod/pow
272
1.85k
  setOperationAction(ISD::FSIN , MVT::f64, Expand);
273
1.85k
  setOperationAction(ISD::FCOS , MVT::f64, Expand);
274
1.85k
  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
275
1.85k
  setOperationAction(ISD::FREM , MVT::f64, Expand);
276
1.85k
  setOperationAction(ISD::FPOW , MVT::f64, Expand);
277
1.85k
  setOperationAction(ISD::FSIN , MVT::f32, Expand);
278
1.85k
  setOperationAction(ISD::FCOS , MVT::f32, Expand);
279
1.85k
  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
280
1.85k
  setOperationAction(ISD::FREM , MVT::f32, Expand);
281
1.85k
  setOperationAction(ISD::FPOW , MVT::f32, Expand);
282
1.85k
  if (Subtarget.hasSPE()) {
283
4
    setOperationAction(ISD::FMA  , MVT::f64, Expand);
284
4
    setOperationAction(ISD::FMA  , MVT::f32, Expand);
285
1.84k
  } else {
286
1.84k
    setOperationAction(ISD::FMA  , MVT::f64, Legal);
287
1.84k
    setOperationAction(ISD::FMA  , MVT::f32, Legal);
288
1.84k
  }
289
1.85k
290
1.85k
  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
291
1.85k
292
1.85k
  // If we're enabling GP optimizations, use hardware square root
293
1.85k
  if (!Subtarget.hasFSQRT() &&
294
1.85k
      
!(572
TM.Options.UnsafeFPMath572
&&
Subtarget.hasFRSQRTE()3
&&
295
572
        
Subtarget.hasFRE()0
))
296
572
    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
297
1.85k
298
1.85k
  if (!Subtarget.hasFSQRT() &&
299
1.85k
      
!(572
TM.Options.UnsafeFPMath572
&&
Subtarget.hasFRSQRTES()3
&&
300
572
        
Subtarget.hasFRES()0
))
301
572
    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
302
1.85k
303
1.85k
  if (Subtarget.hasFCPSGN()) {
304
1.19k
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
305
1.19k
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
306
1.19k
  } else {
307
661
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
308
661
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
309
661
  }
310
1.85k
311
1.85k
  if (Subtarget.hasFPRND()) {
312
1.19k
    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
313
1.19k
    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
314
1.19k
    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
315
1.19k
    setOperationAction(ISD::FROUND, MVT::f64, Legal);
316
1.19k
317
1.19k
    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
318
1.19k
    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
319
1.19k
    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
320
1.19k
    setOperationAction(ISD::FROUND, MVT::f32, Legal);
321
1.19k
  }
322
1.85k
323
1.85k
  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324
1.85k
  // to speed up scalar BSWAP64.
325
1.85k
  // CTPOP or CTTZ were introduced in P8/P9 respectively
326
1.85k
  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
327
1.85k
  if (Subtarget.hasP9Vector())
328
200
    setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
329
1.65k
  else
330
1.65k
    setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
331
1.85k
  if (Subtarget.isISA3_0()) {
332
214
    setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
333
214
    setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
334
1.63k
  } else {
335
1.63k
    setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
336
1.63k
    setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
337
1.63k
  }
338
1.85k
339
1.85k
  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
340
1.10k
    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
341
1.10k
    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
342
1.10k
  } else {
343
751
    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
344
751
    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
345
751
  }
346
1.85k
347
1.85k
  // PowerPC does not have ROTR
348
1.85k
  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
349
1.85k
  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
350
1.85k
351
1.85k
  if (!Subtarget.useCRBits()) {
352
232
    // PowerPC does not have Select
353
232
    setOperationAction(ISD::SELECT, MVT::i32, Expand);
354
232
    setOperationAction(ISD::SELECT, MVT::i64, Expand);
355
232
    setOperationAction(ISD::SELECT, MVT::f32, Expand);
356
232
    setOperationAction(ISD::SELECT, MVT::f64, Expand);
357
232
  }
358
1.85k
359
1.85k
  // PowerPC wants to turn select_cc of FP into fsel when possible.
360
1.85k
  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
361
1.85k
  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
362
1.85k
363
1.85k
  // PowerPC wants to optimize integer setcc a bit
364
1.85k
  if (!Subtarget.useCRBits())
365
232
    setOperationAction(ISD::SETCC, MVT::i32, Custom);
366
1.85k
367
1.85k
  // PowerPC does not have BRCOND which requires SetCC
368
1.85k
  if (!Subtarget.useCRBits())
369
232
    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
370
1.85k
371
1.85k
  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
372
1.85k
373
1.85k
  if (Subtarget.hasSPE()) {
374
4
    // SPE has built-in conversions
375
4
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
376
4
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
377
4
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
378
1.84k
  } else {
379
1.84k
    // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380
1.84k
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
381
1.84k
382
1.84k
    // PowerPC does not have [U|S]INT_TO_FP
383
1.84k
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
384
1.84k
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
385
1.84k
  }
386
1.85k
387
1.85k
  if (Subtarget.hasDirectMove() && 
isPPC64794
) {
388
792
    setOperationAction(ISD::BITCAST, MVT::f32, Legal);
389
792
    setOperationAction(ISD::BITCAST, MVT::i32, Legal);
390
792
    setOperationAction(ISD::BITCAST, MVT::i64, Legal);
391
792
    setOperationAction(ISD::BITCAST, MVT::f64, Legal);
392
1.06k
  } else {
393
1.06k
    setOperationAction(ISD::BITCAST, MVT::f32, Expand);
394
1.06k
    setOperationAction(ISD::BITCAST, MVT::i32, Expand);
395
1.06k
    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
396
1.06k
    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
397
1.06k
  }
398
1.85k
399
1.85k
  // We cannot sextinreg(i1).  Expand to shifts.
400
1.85k
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
401
1.85k
402
1.85k
  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403
1.85k
  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404
1.85k
  // support continuation, user-level threading, and etc.. As a result, no
405
1.85k
  // other SjLj exception interfaces are implemented and please don't build
406
1.85k
  // your own exception handling based on them.
407
1.85k
  // LLVM/Clang supports zero-cost DWARF exception handling.
408
1.85k
  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
409
1.85k
  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
410
1.85k
411
1.85k
  // We want to legalize GlobalAddress and ConstantPool nodes into the
412
1.85k
  // appropriate instructions to materialize the address.
413
1.85k
  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
414
1.85k
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
415
1.85k
  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
416
1.85k
  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
417
1.85k
  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
418
1.85k
  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
419
1.85k
  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
420
1.85k
  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
421
1.85k
  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
422
1.85k
  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
423
1.85k
424
1.85k
  // TRAP is legal.
425
1.85k
  setOperationAction(ISD::TRAP, MVT::Other, Legal);
426
1.85k
427
1.85k
  // TRAMPOLINE is custom lowered.
428
1.85k
  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
429
1.85k
  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
430
1.85k
431
1.85k
  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432
1.85k
  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
433
1.85k
434
1.85k
  if (Subtarget.isSVR4ABI()) {
435
1.84k
    if (isPPC64) {
436
1.46k
      // VAARG always uses double-word chunks, so promote anything smaller.
437
1.46k
      setOperationAction(ISD::VAARG, MVT::i1, Promote);
438
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
439
1.46k
      setOperationAction(ISD::VAARG, MVT::i8, Promote);
440
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
441
1.46k
      setOperationAction(ISD::VAARG, MVT::i16, Promote);
442
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
443
1.46k
      setOperationAction(ISD::VAARG, MVT::i32, Promote);
444
1.46k
      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
445
1.46k
      setOperationAction(ISD::VAARG, MVT::Other, Expand);
446
1.46k
    } else {
447
385
      // VAARG is custom lowered with the 32-bit SVR4 ABI.
448
385
      setOperationAction(ISD::VAARG, MVT::Other, Custom);
449
385
      setOperationAction(ISD::VAARG, MVT::i64, Custom);
450
385
    }
451
1.84k
  } else
452
4
    setOperationAction(ISD::VAARG, MVT::Other, Expand);
453
1.85k
454
1.85k
  if (Subtarget.isSVR4ABI() && 
!isPPC641.84k
)
455
385
    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
456
385
    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
457
1.46k
  else
458
1.46k
    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
459
1.85k
460
1.85k
  // Use the default implementation.
461
1.85k
  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
462
1.85k
  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
463
1.85k
  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
464
1.85k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
465
1.85k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
466
1.85k
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
467
1.85k
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
468
1.85k
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
469
1.85k
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
470
1.85k
471
1.85k
  // We want to custom lower some of our intrinsics.
472
1.85k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
473
1.85k
474
1.85k
  // To handle counter-based loop conditions.
475
1.85k
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
476
1.85k
477
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
478
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
479
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
480
1.85k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
481
1.85k
482
1.85k
  // Comparisons that require checking two conditions.
483
1.85k
  if (Subtarget.hasSPE()) {
484
4
    setCondCodeAction(ISD::SETO, MVT::f32, Expand);
485
4
    setCondCodeAction(ISD::SETO, MVT::f64, Expand);
486
4
    setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
487
4
    setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
488
4
  }
489
1.85k
  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
490
1.85k
  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
491
1.85k
  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
492
1.85k
  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
493
1.85k
  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
494
1.85k
  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
495
1.85k
  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
496
1.85k
  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
497
1.85k
  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
498
1.85k
  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
499
1.85k
  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
500
1.85k
  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
501
1.85k
502
1.85k
  if (Subtarget.has64BitSupport()) {
503
1.51k
    // They also have instructions for converting between i64 and fp.
504
1.51k
    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
505
1.51k
    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
506
1.51k
    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
507
1.51k
    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
508
1.51k
    // This is just the low 32 bits of a (signed) fp->i64 conversion.
509
1.51k
    // We cannot do this with Promote because i64 is not a legal type.
510
1.51k
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
511
1.51k
512
1.51k
    if (Subtarget.hasLFIWAX() || 
Subtarget.isPPC64()326
)
513
1.47k
      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
514
1.51k
  } else {
515
335
    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
516
335
    if (Subtarget.hasSPE())
517
4
      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
518
331
    else
519
331
      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
520
335
  }
521
1.85k
522
1.85k
  // With the instructions enabled under FPCVT, we can do everything.
523
1.85k
  if (Subtarget.hasFPCVT()) {
524
1.17k
    if (Subtarget.has64BitSupport()) {
525
1.17k
      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
526
1.17k
      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
527
1.17k
      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
528
1.17k
      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
529
1.17k
    }
530
1.17k
531
1.17k
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
532
1.17k
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
533
1.17k
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
534
1.17k
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
535
1.17k
  }
536
1.85k
537
1.85k
  if (Subtarget.use64BitRegs()) {
538
1.46k
    // 64-bit PowerPC implementations can support i64 types directly
539
1.46k
    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
540
1.46k
    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
541
1.46k
    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
542
1.46k
    // 64-bit PowerPC wants to expand i128 shifts itself.
543
1.46k
    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
544
1.46k
    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
545
1.46k
    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
546
1.46k
  } else {
547
387
    // 32-bit PowerPC wants to expand i64 shifts itself.
548
387
    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
549
387
    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
550
387
    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
551
387
  }
552
1.85k
553
1.85k
  if (Subtarget.hasAltivec()) {
554
1.22k
    // First set operation action for all vector types to expand. Then we
555
1.22k
    // will selectively turn on ones that can be effectively codegen'd.
556
135k
    for (MVT VT : MVT::vector_valuetypes()) {
557
135k
      // add/sub are legal for all supported vector VT's.
558
135k
      setOperationAction(ISD::ADD, VT, Legal);
559
135k
      setOperationAction(ISD::SUB, VT, Legal);
560
135k
561
135k
      // For v2i64, these are only valid with P8Vector. This is corrected after
562
135k
      // the loop.
563
135k
      setOperationAction(ISD::SMAX, VT, Legal);
564
135k
      setOperationAction(ISD::SMIN, VT, Legal);
565
135k
      setOperationAction(ISD::UMAX, VT, Legal);
566
135k
      setOperationAction(ISD::UMIN, VT, Legal);
567
135k
568
135k
      if (Subtarget.hasVSX()) {
569
115k
        setOperationAction(ISD::FMAXNUM, VT, Legal);
570
115k
        setOperationAction(ISD::FMINNUM, VT, Legal);
571
115k
      }
572
135k
573
135k
      // Vector instructions introduced in P8
574
135k
      if (Subtarget.hasP8Altivec() && 
(VT.SimpleTy != MVT::v1i128)92.0k
) {
575
91.1k
        setOperationAction(ISD::CTPOP, VT, Legal);
576
91.1k
        setOperationAction(ISD::CTLZ, VT, Legal);
577
91.1k
      }
578
44.2k
      else {
579
44.2k
        setOperationAction(ISD::CTPOP, VT, Expand);
580
44.2k
        setOperationAction(ISD::CTLZ, VT, Expand);
581
44.2k
      }
582
135k
583
135k
      // Vector instructions introduced in P9
584
135k
      if (Subtarget.hasP9Altivec() && 
(VT.SimpleTy != MVT::v1i128)23.6k
)
585
23.4k
        setOperationAction(ISD::CTTZ, VT, Legal);
586
111k
      else
587
111k
        setOperationAction(ISD::CTTZ, VT, Expand);
588
135k
589
135k
      // We promote all shuffles to v16i8.
590
135k
      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
591
135k
      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
592
135k
593
135k
      // We promote all non-typed operations to v4i32.
594
135k
      setOperationAction(ISD::AND   , VT, Promote);
595
135k
      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
596
135k
      setOperationAction(ISD::OR    , VT, Promote);
597
135k
      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
598
135k
      setOperationAction(ISD::XOR   , VT, Promote);
599
135k
      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
600
135k
      setOperationAction(ISD::LOAD  , VT, Promote);
601
135k
      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
602
135k
      setOperationAction(ISD::SELECT, VT, Promote);
603
135k
      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
604
135k
      setOperationAction(ISD::VSELECT, VT, Legal);
605
135k
      setOperationAction(ISD::SELECT_CC, VT, Promote);
606
135k
      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
607
135k
      setOperationAction(ISD::STORE, VT, Promote);
608
135k
      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
609
135k
610
135k
      // No other operations are legal.
611
135k
      setOperationAction(ISD::MUL , VT, Expand);
612
135k
      setOperationAction(ISD::SDIV, VT, Expand);
613
135k
      setOperationAction(ISD::SREM, VT, Expand);
614
135k
      setOperationAction(ISD::UDIV, VT, Expand);
615
135k
      setOperationAction(ISD::UREM, VT, Expand);
616
135k
      setOperationAction(ISD::FDIV, VT, Expand);
617
135k
      setOperationAction(ISD::FREM, VT, Expand);
618
135k
      setOperationAction(ISD::FNEG, VT, Expand);
619
135k
      setOperationAction(ISD::FSQRT, VT, Expand);
620
135k
      setOperationAction(ISD::FLOG, VT, Expand);
621
135k
      setOperationAction(ISD::FLOG10, VT, Expand);
622
135k
      setOperationAction(ISD::FLOG2, VT, Expand);
623
135k
      setOperationAction(ISD::FEXP, VT, Expand);
624
135k
      setOperationAction(ISD::FEXP2, VT, Expand);
625
135k
      setOperationAction(ISD::FSIN, VT, Expand);
626
135k
      setOperationAction(ISD::FCOS, VT, Expand);
627
135k
      setOperationAction(ISD::FABS, VT, Expand);
628
135k
      setOperationAction(ISD::FFLOOR, VT, Expand);
629
135k
      setOperationAction(ISD::FCEIL,  VT, Expand);
630
135k
      setOperationAction(ISD::FTRUNC, VT, Expand);
631
135k
      setOperationAction(ISD::FRINT,  VT, Expand);
632
135k
      setOperationAction(ISD::FNEARBYINT, VT, Expand);
633
135k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
634
135k
      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
635
135k
      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
636
135k
      setOperationAction(ISD::MULHU, VT, Expand);
637
135k
      setOperationAction(ISD::MULHS, VT, Expand);
638
135k
      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
639
135k
      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
640
135k
      setOperationAction(ISD::UDIVREM, VT, Expand);
641
135k
      setOperationAction(ISD::SDIVREM, VT, Expand);
642
135k
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
643
135k
      setOperationAction(ISD::FPOW, VT, Expand);
644
135k
      setOperationAction(ISD::BSWAP, VT, Expand);
645
135k
      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
646
135k
      setOperationAction(ISD::ROTL, VT, Expand);
647
135k
      setOperationAction(ISD::ROTR, VT, Expand);
648
135k
649
15.0M
      for (MVT InnerVT : MVT::vector_valuetypes()) {
650
15.0M
        setTruncStoreAction(VT, InnerVT, Expand);
651
15.0M
        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
652
15.0M
        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
653
15.0M
        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
654
15.0M
      }
655
135k
    }
656
1.22k
    if (!Subtarget.hasP8Vector()) {
657
420
      setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
658
420
      setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
659
420
      setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
660
420
      setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
661
420
    }
662
1.22k
663
1.22k
    for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
664
4.88k
      setOperationAction(ISD::ABS, VT, Custom);
665
1.22k
666
1.22k
    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
667
1.22k
    // with merges, splats, etc.
668
1.22k
    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
669
1.22k
670
1.22k
    // Vector truncates to sub-word integer that fit in an Altivec/VSX register
671
1.22k
    // are cheap, so handle them before they get expanded to scalar.
672
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
673
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
674
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
675
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
676
1.22k
    setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
677
1.22k
678
1.22k
    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
679
1.22k
    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
680
1.22k
    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
681
1.22k
    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
682
1.22k
    setOperationAction(ISD::SELECT, MVT::v4i32,
683
1.22k
                       Subtarget.useCRBits() ? 
Legal1.05k
:
Expand166
);
684
1.22k
    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
685
1.22k
    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
686
1.22k
    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
687
1.22k
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
688
1.22k
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
689
1.22k
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
690
1.22k
    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
691
1.22k
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
692
1.22k
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
693
1.22k
694
1.22k
    // Without hasP8Altivec set, v2i64 SMAX isn't available.
695
1.22k
    // But ABS custom lowering requires SMAX support.
696
1.22k
    if (!Subtarget.hasP8Altivec())
697
391
      setOperationAction(ISD::ABS, MVT::v2i64, Expand);
698
1.22k
699
1.22k
    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
700
1.22k
    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
701
1.22k
    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
702
1.22k
    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
703
1.22k
704
1.22k
    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
705
1.22k
    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
706
1.22k
707
1.22k
    if (TM.Options.UnsafeFPMath || 
Subtarget.hasVSX()1.20k
) {
708
1.04k
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
709
1.04k
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
710
1.04k
    }
711
1.22k
712
1.22k
    if (Subtarget.hasP8Altivec())
713
829
      setOperationAction(ISD::MUL, MVT::v4i32, Legal);
714
391
    else
715
391
      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
716
1.22k
717
1.22k
    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
718
1.22k
    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
719
1.22k
720
1.22k
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
721
1.22k
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
722
1.22k
723
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
724
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
725
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
726
1.22k
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
727
1.22k
728
1.22k
    // Altivec does not contain unordered floating-point compare instructions
729
1.22k
    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
730
1.22k
    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
731
1.22k
    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
732
1.22k
    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
733
1.22k
734
1.22k
    if (Subtarget.hasVSX()) {
735
1.03k
      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
736
1.03k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
737
1.03k
      if (Subtarget.hasP8Vector()) {
738
800
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
739
800
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
740
800
      }
741
1.03k
      if (Subtarget.hasDirectMove() && 
isPPC64794
) {
742
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
743
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
744
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
745
792
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
746
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
747
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
748
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
749
792
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
750
792
      }
751
1.03k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
752
1.03k
753
1.03k
      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
754
1.03k
      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
755
1.03k
      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
756
1.03k
      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
757
1.03k
      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
758
1.03k
759
1.03k
      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
760
1.03k
761
1.03k
      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
762
1.03k
      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
763
1.03k
764
1.03k
      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
765
1.03k
      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
766
1.03k
767
1.03k
      // Share the Altivec comparison restrictions.
768
1.03k
      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
769
1.03k
      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
770
1.03k
      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
771
1.03k
      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
772
1.03k
773
1.03k
      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
774
1.03k
      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
775
1.03k
776
1.03k
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
777
1.03k
778
1.03k
      if (Subtarget.hasP8Vector())
779
800
        addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
780
1.03k
781
1.03k
      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
782
1.03k
783
1.03k
      addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
784
1.03k
      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
785
1.03k
      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
786
1.03k
787
1.03k
      if (Subtarget.hasP8Altivec()) {
788
802
        setOperationAction(ISD::SHL, MVT::v2i64, Legal);
789
802
        setOperationAction(ISD::SRA, MVT::v2i64, Legal);
790
802
        setOperationAction(ISD::SRL, MVT::v2i64, Legal);
791
802
792
802
        // 128 bit shifts can be accomplished via 3 instructions for SHL and
793
802
        // SRL, but not for SRA because of the instructions available:
794
802
        // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
795
802
        // doing
796
802
        setOperationAction(ISD::SHL, MVT::v1i128, Expand);
797
802
        setOperationAction(ISD::SRL, MVT::v1i128, Expand);
798
802
        setOperationAction(ISD::SRA, MVT::v1i128, Expand);
799
802
800
802
        setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
801
802
      }
802
236
      else {
803
236
        setOperationAction(ISD::SHL, MVT::v2i64, Expand);
804
236
        setOperationAction(ISD::SRA, MVT::v2i64, Expand);
805
236
        setOperationAction(ISD::SRL, MVT::v2i64, Expand);
806
236
807
236
        setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
808
236
809
236
        // VSX v2i64 only supports non-arithmetic operations.
810
236
        setOperationAction(ISD::ADD, MVT::v2i64, Expand);
811
236
        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
812
236
      }
813
1.03k
814
1.03k
      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
815
1.03k
      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
816
1.03k
      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
817
1.03k
      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
818
1.03k
819
1.03k
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
820
1.03k
821
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
822
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
823
1.03k
      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
824
1.03k
      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
825
1.03k
826
1.03k
      // Custom handling for partial vectors of integers converted to
827
1.03k
      // floating point. We already have optimal handling for v2i32 through
828
1.03k
      // the DAG combine, so those aren't necessary.
829
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
830
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
831
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
832
1.03k
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
833
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
834
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
835
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
836
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
837
1.03k
838
1.03k
      setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
839
1.03k
      setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
840
1.03k
      setOperationAction(ISD::FABS, MVT::v4f32, Legal);
841
1.03k
      setOperationAction(ISD::FABS, MVT::v2f64, Legal);
842
1.03k
      setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
843
1.03k
      setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
844
1.03k
845
1.03k
      if (Subtarget.hasDirectMove())
846
794
        setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
847
1.03k
      setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
848
1.03k
849
1.03k
      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
850
1.03k
    }
851
1.22k
852
1.22k
    if (Subtarget.hasP8Altivec()) {
853
829
      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
854
829
      addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
855
829
    }
856
1.22k
857
1.22k
    if (Subtarget.hasP9Vector()) {
858
200
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
859
200
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
860
200
861
200
      // 128 bit shifts can be accomplished via 3 instructions for SHL and
862
200
      // SRL, but not for SRA because of the instructions available:
863
200
      // VS{RL} and VS{RL}O.
864
200
      setOperationAction(ISD::SHL, MVT::v1i128, Legal);
865
200
      setOperationAction(ISD::SRL, MVT::v1i128, Legal);
866
200
      setOperationAction(ISD::SRA, MVT::v1i128, Expand);
867
200
868
200
      if (EnableQuadPrecision) {
869
15
        addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
870
15
        setOperationAction(ISD::FADD, MVT::f128, Legal);
871
15
        setOperationAction(ISD::FSUB, MVT::f128, Legal);
872
15
        setOperationAction(ISD::FDIV, MVT::f128, Legal);
873
15
        setOperationAction(ISD::FMUL, MVT::f128, Legal);
874
15
        setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
875
15
        // No extending loads to f128 on PPC.
876
15
        for (MVT FPT : MVT::fp_valuetypes())
877
90
          setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
878
15
        setOperationAction(ISD::FMA, MVT::f128, Legal);
879
15
        setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
880
15
        setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
881
15
        setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
882
15
        setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
883
15
        setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
884
15
        setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
885
15
886
15
        setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
887
15
        setOperationAction(ISD::FRINT, MVT::f128, Legal);
888
15
        setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
889
15
        setOperationAction(ISD::FCEIL, MVT::f128, Legal);
890
15
        setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
891
15
        setOperationAction(ISD::FROUND, MVT::f128, Legal);
892
15
893
15
        setOperationAction(ISD::SELECT, MVT::f128, Expand);
894
15
        setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
895
15
        setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
896
15
        setTruncStoreAction(MVT::f128, MVT::f64, Expand);
897
15
        setTruncStoreAction(MVT::f128, MVT::f32, Expand);
898
15
        setOperationAction(ISD::BITCAST, MVT::i128, Custom);
899
15
        // No implementation for these ops for PowerPC.
900
15
        setOperationAction(ISD::FSIN , MVT::f128, Expand);
901
15
        setOperationAction(ISD::FCOS , MVT::f128, Expand);
902
15
        setOperationAction(ISD::FPOW, MVT::f128, Expand);
903
15
        setOperationAction(ISD::FPOWI, MVT::f128, Expand);
904
15
        setOperationAction(ISD::FREM, MVT::f128, Expand);
905
15
      }
906
200
      setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
907
200
908
200
    }
909
1.22k
910
1.22k
    if (Subtarget.hasP9Altivec()) {
911
213
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
912
213
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
913
213
    }
914
1.22k
  }
915
1.85k
916
1.85k
  if (Subtarget.hasQPX()) {
917
40
    setOperationAction(ISD::FADD, MVT::v4f64, Legal);
918
40
    setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
919
40
    setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
920
40
    setOperationAction(ISD::FREM, MVT::v4f64, Expand);
921
40
922
40
    setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
923
40
    setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
924
40
925
40
    setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
926
40
    setOperationAction(ISD::STORE , MVT::v4f64, Custom);
927
40
928
40
    setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
929
40
    setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
930
40
931
40
    if (!Subtarget.useCRBits())
932
5
      setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
933
40
    setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
934
40
935
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
936
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
937
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
938
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
939
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
940
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
941
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
942
40
943
40
    setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
944
40
    setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
945
40
946
40
    setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
947
40
    setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
948
40
    setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
949
40
950
40
    setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
951
40
    setOperationAction(ISD::FABS , MVT::v4f64, Legal);
952
40
    setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
953
40
    setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
954
40
    setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
955
40
    setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
956
40
    setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
957
40
    setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
958
40
    setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
959
40
    setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
960
40
961
40
    setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
962
40
    setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
963
40
964
40
    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
965
40
    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
966
40
967
40
    addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
968
40
969
40
    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
970
40
    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
971
40
    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
972
40
    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
973
40
974
40
    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
975
40
    setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
976
40
977
40
    setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
978
40
    setOperationAction(ISD::STORE , MVT::v4f32, Custom);
979
40
980
40
    if (!Subtarget.useCRBits())
981
5
      setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
982
40
    setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
983
40
984
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
985
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
986
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
987
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
988
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
989
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
990
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
991
40
992
40
    setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
993
40
    setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
994
40
995
40
    setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
996
40
    setOperationAction(ISD::FABS , MVT::v4f32, Legal);
997
40
    setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
998
40
    setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
999
40
    setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
1000
40
    setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
1001
40
    setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
1002
40
    setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
1003
40
    setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
1004
40
    setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
1005
40
1006
40
    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1007
40
    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1008
40
1009
40
    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
1010
40
    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
1011
40
1012
40
    addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1013
40
1014
40
    setOperationAction(ISD::AND , MVT::v4i1, Legal);
1015
40
    setOperationAction(ISD::OR , MVT::v4i1, Legal);
1016
40
    setOperationAction(ISD::XOR , MVT::v4i1, Legal);
1017
40
1018
40
    if (!Subtarget.useCRBits())
1019
5
      setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
1020
40
    setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
1021
40
1022
40
    setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
1023
40
    setOperationAction(ISD::STORE , MVT::v4i1, Custom);
1024
40
1025
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
1026
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
1027
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
1028
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
1029
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
1030
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
1031
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1032
40
1033
40
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1034
40
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1035
40
1036
40
    addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1037
40
1038
40
    setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1039
40
    setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
1040
40
    setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1041
40
    setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1042
40
1043
40
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1044
40
    setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
1045
40
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1046
40
    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1047
40
1048
40
    setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1049
40
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1050
40
1051
40
    // These need to set FE_INEXACT, and so cannot be vectorized here.
1052
40
    setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1053
40
    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1054
40
1055
40
    if (TM.Options.UnsafeFPMath) {
1056
3
      setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1057
3
      setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1058
3
1059
3
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1060
3
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1061
37
    } else {
1062
37
      setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1063
37
      setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1064
37
1065
37
      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1066
37
      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1067
37
    }
1068
40
  }
1069
1.85k
1070
1.85k
  if (Subtarget.has64BitSupport())
1071
1.51k
    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1072
1.85k
1073
1.85k
  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? 
Legal1.46k
:
Custom387
);
1074
1.85k
1075
1.85k
  if (!isPPC64) {
1076
387
    setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1077
387
    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1078
387
  }
1079
1.85k
1080
1.85k
  setBooleanContents(ZeroOrOneBooleanContent);
1081
1.85k
1082
1.85k
  if (Subtarget.hasAltivec()) {
1083
1.22k
    // Altivec instructions set fields to all zeros or all ones.
1084
1.22k
    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1085
1.22k
  }
1086
1.85k
1087
1.85k
  if (!isPPC64) {
1088
387
    // These libcalls are not available in 32-bit.
1089
387
    setLibcallName(RTLIB::SHL_I128, nullptr);
1090
387
    setLibcallName(RTLIB::SRL_I128, nullptr);
1091
387
    setLibcallName(RTLIB::SRA_I128, nullptr);
1092
387
  }
1093
1.85k
1094
1.85k
  setStackPointerRegisterToSaveRestore(isPPC64 ? 
PPC::X11.46k
:
PPC::R1387
);
1095
1.85k
1096
1.85k
  // We have target-specific dag combine patterns for the following nodes:
1097
1.85k
  setTargetDAGCombine(ISD::ADD);
1098
1.85k
  setTargetDAGCombine(ISD::SHL);
1099
1.85k
  setTargetDAGCombine(ISD::SRA);
1100
1.85k
  setTargetDAGCombine(ISD::SRL);
1101
1.85k
  setTargetDAGCombine(ISD::MUL);
1102
1.85k
  setTargetDAGCombine(ISD::SINT_TO_FP);
1103
1.85k
  setTargetDAGCombine(ISD::BUILD_VECTOR);
1104
1.85k
  if (Subtarget.hasFPCVT())
1105
1.17k
    setTargetDAGCombine(ISD::UINT_TO_FP);
1106
1.85k
  setTargetDAGCombine(ISD::LOAD);
1107
1.85k
  setTargetDAGCombine(ISD::STORE);
1108
1.85k
  setTargetDAGCombine(ISD::BR_CC);
1109
1.85k
  if (Subtarget.useCRBits())
1110
1.62k
    setTargetDAGCombine(ISD::BRCOND);
1111
1.85k
  setTargetDAGCombine(ISD::BSWAP);
1112
1.85k
  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1113
1.85k
  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1114
1.85k
  setTargetDAGCombine(ISD::INTRINSIC_VOID);
1115
1.85k
1116
1.85k
  setTargetDAGCombine(ISD::SIGN_EXTEND);
1117
1.85k
  setTargetDAGCombine(ISD::ZERO_EXTEND);
1118
1.85k
  setTargetDAGCombine(ISD::ANY_EXTEND);
1119
1.85k
1120
1.85k
  setTargetDAGCombine(ISD::TRUNCATE);
1121
1.85k
1122
1.85k
  if (Subtarget.useCRBits()) {
1123
1.62k
    setTargetDAGCombine(ISD::TRUNCATE);
1124
1.62k
    setTargetDAGCombine(ISD::SETCC);
1125
1.62k
    setTargetDAGCombine(ISD::SELECT_CC);
1126
1.62k
  }
1127
1.85k
1128
1.85k
  // Use reciprocal estimates.
1129
1.85k
  if (TM.Options.UnsafeFPMath) {
1130
17
    setTargetDAGCombine(ISD::FDIV);
1131
17
    setTargetDAGCombine(ISD::FSQRT);
1132
17
  }
1133
1.85k
1134
1.85k
  if (Subtarget.hasP9Altivec()) {
1135
213
    setTargetDAGCombine(ISD::ABS);
1136
213
    setTargetDAGCombine(ISD::VSELECT);
1137
213
  }
1138
1.85k
1139
1.85k
  // Darwin long double math library functions have $LDBL128 appended.
1140
1.85k
  if (Subtarget.isDarwin()) {
1141
0
    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1142
0
    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1143
0
    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1144
0
    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1145
0
    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1146
0
    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1147
0
    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1148
0
    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1149
0
    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1150
0
    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1151
0
  }
1152
1.85k
1153
1.85k
  if (EnableQuadPrecision) {
1154
15
    setLibcallName(RTLIB::LOG_F128, "logf128");
1155
15
    setLibcallName(RTLIB::LOG2_F128, "log2f128");
1156
15
    setLibcallName(RTLIB::LOG10_F128, "log10f128");
1157
15
    setLibcallName(RTLIB::EXP_F128, "expf128");
1158
15
    setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1159
15
    setLibcallName(RTLIB::SIN_F128, "sinf128");
1160
15
    setLibcallName(RTLIB::COS_F128, "cosf128");
1161
15
    setLibcallName(RTLIB::POW_F128, "powf128");
1162
15
    setLibcallName(RTLIB::FMIN_F128, "fminf128");
1163
15
    setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1164
15
    setLibcallName(RTLIB::POWI_F128, "__powikf2");
1165
15
    setLibcallName(RTLIB::REM_F128, "fmodf128");
1166
15
  }
1167
1.85k
1168
1.85k
  // With 32 condition bits, we don't need to sink (and duplicate) compares
1169
1.85k
  // aggressively in CodeGenPrep.
1170
1.85k
  if (Subtarget.useCRBits()) {
1171
1.62k
    setHasMultipleConditionRegisters();
1172
1.62k
    setJumpIsExpensive();
1173
1.62k
  }
1174
1.85k
1175
1.85k
  setMinFunctionAlignment(2);
1176
1.85k
  if (Subtarget.isDarwin())
1177
0
    setPrefFunctionAlignment(4);
1178
1.85k
1179
1.85k
  switch (Subtarget.getDarwinDirective()) {
1180
1.85k
  
default: break595
;
1181
1.85k
  case PPC::DIR_970:
1182
1.25k
  case PPC::DIR_A2:
1183
1.25k
  case PPC::DIR_E500:
1184
1.25k
  case PPC::DIR_E500mc:
1185
1.25k
  case PPC::DIR_E5500:
1186
1.25k
  case PPC::DIR_PWR4:
1187
1.25k
  case PPC::DIR_PWR5:
1188
1.25k
  case PPC::DIR_PWR5X:
1189
1.25k
  case PPC::DIR_PWR6:
1190
1.25k
  case PPC::DIR_PWR6X:
1191
1.25k
  case PPC::DIR_PWR7:
1192
1.25k
  case PPC::DIR_PWR8:
1193
1.25k
  case PPC::DIR_PWR9:
1194
1.25k
    setPrefFunctionAlignment(4);
1195
1.25k
    setPrefLoopAlignment(4);
1196
1.25k
    break;
1197
1.85k
  }
1198
1.85k
1199
1.85k
  if (Subtarget.enableMachineScheduler())
1200
1.85k
    setSchedulingPreference(Sched::Source);
1201
0
  else
1202
0
    setSchedulingPreference(Sched::Hybrid);
1203
1.85k
1204
1.85k
  computeRegisterProperties(STI.getRegisterInfo());
1205
1.85k
1206
1.85k
  // The Freescale cores do better with aggressive inlining of memcpy and
1207
1.85k
  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1208
1.85k
  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1209
1.85k
      
Subtarget.getDarwinDirective() == PPC::DIR_E55001.84k
) {
1210
5
    MaxStoresPerMemset = 32;
1211
5
    MaxStoresPerMemsetOptSize = 16;
1212
5
    MaxStoresPerMemcpy = 32;
1213
5
    MaxStoresPerMemcpyOptSize = 8;
1214
5
    MaxStoresPerMemmove = 32;
1215
5
    MaxStoresPerMemmoveOptSize = 8;
1216
1.84k
  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1217
83
    // The A2 also benefits from (very) aggressive inlining of memcpy and
1218
83
    // friends. The overhead of a the function call, even when warm, can be
1219
83
    // over one hundred cycles.
1220
83
    MaxStoresPerMemset = 128;
1221
83
    MaxStoresPerMemcpy = 128;
1222
83
    MaxStoresPerMemmove = 128;
1223
83
    MaxLoadsPerMemcmp = 128;
1224
1.76k
  } else {
1225
1.76k
    MaxLoadsPerMemcmp = 8;
1226
1.76k
    MaxLoadsPerMemcmpOptSize = 4;
1227
1.76k
  }
1228
1.85k
}
1229
1230
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1231
/// the desired ByVal argument alignment.
1232
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1233
285
                             unsigned MaxMaxAlign) {
1234
285
  if (MaxAlign == MaxMaxAlign)
1235
0
    return;
1236
285
  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1237
4
    if (MaxMaxAlign >= 32 && 
VTy->getBitWidth() >= 2560
)
1238
0
      MaxAlign = 32;
1239
4
    else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1240
4
      MaxAlign = 16;
1241
281
  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1242
20
    unsigned EltAlign = 0;
1243
20
    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1244
20
    if (EltAlign > MaxAlign)
1245
0
      MaxAlign = EltAlign;
1246
261
  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1247
162
    for (auto *EltTy : STy->elements()) {
1248
162
      unsigned EltAlign = 0;
1249
162
      getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1250
162
      if (EltAlign > MaxAlign)
1251
4
        MaxAlign = EltAlign;
1252
162
      if (MaxAlign == MaxMaxAlign)
1253
4
        break;
1254
162
    }
1255
103
  }
1256
285
}
1257
1258
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1259
/// function arguments in the caller parameter area.
1260
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1261
125
                                                  const DataLayout &DL) const {
1262
125
  // Darwin passes everything on 4 byte boundary.
1263
125
  if (Subtarget.isDarwin())
1264
0
    return 4;
1265
125
1266
125
  // 16byte and wider vectors are passed on 16byte boundary.
1267
125
  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1268
125
  unsigned Align = Subtarget.isPPC64() ? 
8113
:
412
;
1269
125
  if (Subtarget.hasAltivec() || 
Subtarget.hasQPX()22
)
1270
103
    getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 
320
: 16);
1271
125
  return Align;
1272
125
}
1273
1274
17.7k
bool PPCTargetLowering::useSoftFloat() const {
1275
17.7k
  return Subtarget.useSoftFloat();
1276
17.7k
}
1277
1278
9.69k
bool PPCTargetLowering::hasSPE() const {
1279
9.69k
  return Subtarget.hasSPE();
1280
9.69k
}
1281
1282
42.8k
bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1283
42.8k
  return VT.isScalarInteger();
1284
42.8k
}
1285
1286
0
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1287
0
  switch ((PPCISD::NodeType)Opcode) {
1288
0
  case PPCISD::FIRST_NUMBER:    break;
1289
0
  case PPCISD::FSEL:            return "PPCISD::FSEL";
1290
0
  case PPCISD::FCFID:           return "PPCISD::FCFID";
1291
0
  case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1292
0
  case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1293
0
  case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1294
0
  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1295
0
  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1296
0
  case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1297
0
  case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1298
0
  case PPCISD::FP_TO_UINT_IN_VSR:
1299
0
                                return "PPCISD::FP_TO_UINT_IN_VSR,";
1300
0
  case PPCISD::FP_TO_SINT_IN_VSR:
1301
0
                                return "PPCISD::FP_TO_SINT_IN_VSR";
1302
0
  case PPCISD::FRE:             return "PPCISD::FRE";
1303
0
  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1304
0
  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1305
0
  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1306
0
  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1307
0
  case PPCISD::VPERM:           return "PPCISD::VPERM";
1308
0
  case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1309
0
  case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1310
0
  case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1311
0
  case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1312
0
  case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1313
0
  case PPCISD::CMPB:            return "PPCISD::CMPB";
1314
0
  case PPCISD::Hi:              return "PPCISD::Hi";
1315
0
  case PPCISD::Lo:              return "PPCISD::Lo";
1316
0
  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1317
0
  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1318
0
  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1319
0
  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1320
0
  case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1321
0
  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1322
0
  case PPCISD::SRL:             return "PPCISD::SRL";
1323
0
  case PPCISD::SRA:             return "PPCISD::SRA";
1324
0
  case PPCISD::SHL:             return "PPCISD::SHL";
1325
0
  case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1326
0
  case PPCISD::CALL:            return "PPCISD::CALL";
1327
0
  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1328
0
  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1329
0
  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1330
0
  case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1331
0
  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1332
0
  case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1333
0
  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1334
0
  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1335
0
  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1336
0
  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1337
0
  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1338
0
  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1339
0
  case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1340
0
  case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1341
0
  case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1342
0
  case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1343
0
  case PPCISD::VCMP:            return "PPCISD::VCMP";
1344
0
  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1345
0
  case PPCISD::LBRX:            return "PPCISD::LBRX";
1346
0
  case PPCISD::STBRX:           return "PPCISD::STBRX";
1347
0
  case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1348
0
  case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1349
0
  case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1350
0
  case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1351
0
  case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1352
0
  case PPCISD::SExtVElems:      return "PPCISD::SExtVElems";
1353
0
  case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1354
0
  case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1355
0
  case PPCISD::ST_VSR_SCAL_INT:
1356
0
                                return "PPCISD::ST_VSR_SCAL_INT";
1357
0
  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1358
0
  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1359
0
  case PPCISD::BDZ:             return "PPCISD::BDZ";
1360
0
  case PPCISD::MFFS:            return "PPCISD::MFFS";
1361
0
  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1362
0
  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1363
0
  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1364
0
  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1365
0
  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1366
0
  case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1367
0
  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1368
0
  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1369
0
  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1370
0
  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1371
0
  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1372
0
  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1373
0
  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1374
0
  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1375
0
  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1376
0
  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1377
0
  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1378
0
  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1379
0
  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1380
0
  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1381
0
  case PPCISD::SC:              return "PPCISD::SC";
1382
0
  case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1383
0
  case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1384
0
  case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1385
0
  case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1386
0
  case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1387
0
  case PPCISD::VABSD:           return "PPCISD::VABSD";
1388
0
  case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1389
0
  case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1390
0
  case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1391
0
  case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1392
0
  case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1393
0
  case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1394
0
  case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1395
0
  case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1396
0
  case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1397
0
  case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1398
0
  case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1399
0
  case PPCISD::FP_EXTEND_LH:    return "PPCISD::FP_EXTEND_LH";
1400
0
  }
1401
0
  return nullptr;
1402
0
}
1403
1404
EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1405
10.2k
                                          EVT VT) const {
1406
10.2k
  if (!VT.isVector())
1407
9.84k
    return Subtarget.useCRBits() ? 
MVT::i19.36k
:
MVT::i32478
;
1408
437
1409
437
  if (Subtarget.hasQPX())
1410
6
    return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1411
431
1412
431
  return VT.changeVectorElementTypeToInteger();
1413
431
}
1414
1415
550
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1416
550
  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1417
550
  return true;
1418
550
}
1419
1420
//===----------------------------------------------------------------------===//
1421
// Node matching predicates, for use by the tblgen matching code.
1422
//===----------------------------------------------------------------------===//
1423
1424
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1425
19
static bool isFloatingPointZero(SDValue Op) {
1426
19
  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1427
9
    return CFP->getValueAPF().isZero();
1428
10
  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1429
0
    // Maybe this has already been legalized into the constant pool?
1430
0
    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1431
0
      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1432
0
        return CFP->getValueAPF().isZero();
1433
10
  }
1434
10
  return false;
1435
10
}
1436
1437
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1438
/// true if Op is undef or if it matches the specified value.
1439
108k
static bool isConstantOrUndef(int Op, int Val) {
1440
108k
  return Op < 0 || 
Op == Val66.1k
;
1441
108k
}
1442
1443
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1444
/// VPKUHUM instruction.
1445
/// The ShuffleKind distinguishes between big-endian operations with
1446
/// two different inputs (0), either-endian operations with two identical
1447
/// inputs (1), and little-endian operations with two different inputs (2).
1448
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1449
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1450
4.22k
                               SelectionDAG &DAG) {
1451
4.22k
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1452
4.22k
  if (ShuffleKind == 0) {
1453
1.53k
    if (IsLE)
1454
509
      return false;
1455
1.24k
    
for (unsigned i = 0; 1.02k
i != 16;
++i212
)
1456
1.24k
      if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1457
1.02k
        return false;
1458
2.68k
  } else if (ShuffleKind == 2) {
1459
2.17k
    if (!IsLE)
1460
3
      return false;
1461
4.28k
    
for (unsigned i = 0; 2.16k
i != 16;
++i2.11k
)
1462
4.28k
      if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1463
2.16k
        return false;
1464
2.16k
  } else 
if (518
ShuffleKind == 1518
) {
1465
518
    unsigned j = IsLE ? 
0280
:
1238
;
1466
1.05k
    for (unsigned i = 0; i != 8; 
++i534
)
1467
1.02k
      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1468
1.02k
          
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j)660
)
1469
491
        return false;
1470
518
  }
1471
4.22k
  
return true30
;
1472
4.22k
}
1473
1474
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1475
/// VPKUWUM instruction.
1476
/// The ShuffleKind distinguishes between big-endian operations with
1477
/// two different inputs (0), either-endian operations with two identical
1478
/// inputs (1), and little-endian operations with two different inputs (2).
1479
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1480
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1481
4.27k
                               SelectionDAG &DAG) {
1482
4.27k
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1483
4.27k
  if (ShuffleKind == 0) {
1484
1.54k
    if (IsLE)
1485
509
      return false;
1486
1.17k
    
for (unsigned i = 0; 1.03k
i != 16;
i += 2144
)
1487
1.17k
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1488
1.17k
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3)161
)
1489
1.02k
        return false;
1490
2.73k
  } else if (ShuffleKind == 2) {
1491
2.18k
    if (!IsLE)
1492
3
      return false;
1493
3.88k
    
for (unsigned i = 0; 2.17k
i != 16;
i += 21.70k
)
1494
3.87k
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1495
3.87k
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1)2.14k
)
1496
2.16k
        return false;
1497
2.17k
  } else 
if (556
ShuffleKind == 1556
) {
1498
556
    unsigned j = IsLE ? 
0293
:
2263
;
1499
860
    for (unsigned i = 0; i != 8; 
i += 2304
)
1500
822
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1501
822
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1)456
||
1502
822
          
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j)423
||
1503
822
          
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)304
)
1504
518
        return false;
1505
556
  }
1506
4.27k
  
return true53
;
1507
4.27k
}
1508
1509
/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1510
/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1511
/// current subtarget.
1512
///
1513
/// The ShuffleKind distinguishes between big-endian operations with
1514
/// two different inputs (0), either-endian operations with two identical
1515
/// inputs (1), and little-endian operations with two different inputs (2).
1516
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1517
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1518
716
                               SelectionDAG &DAG) {
1519
716
  const PPCSubtarget& Subtarget =
1520
716
    static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1521
716
  if (!Subtarget.hasP8Vector())
1522
0
    return false;
1523
716
1524
716
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1525
716
  if (ShuffleKind == 0) {
1526
178
    if (IsLE)
1527
1
      return false;
1528
213
    
for (unsigned i = 0; 177
i != 16;
i += 436
)
1529
210
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1530
210
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+5)64
||
1531
210
          
!isConstantOrUndef(N->getMaskElt(i+2), i*2+6)64
||
1532
210
          
!isConstantOrUndef(N->getMaskElt(i+3), i*2+7)50
)
1533
174
        return false;
1534
538
  } else if (ShuffleKind == 2) {
1535
320
    if (!IsLE)
1536
0
      return false;
1537
463
    
for (unsigned i = 0; 320
i != 16;
i += 4143
)
1538
460
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1539
460
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1)189
||
1540
460
          
!isConstantOrUndef(N->getMaskElt(i+2), i*2+2)173
||
1541
460
          
!isConstantOrUndef(N->getMaskElt(i+3), i*2+3)145
)
1542
317
        return false;
1543
320
  } else 
if (218
ShuffleKind == 1218
) {
1544
218
    unsigned j = IsLE ? 
0141
:
477
;
1545
253
    for (unsigned i = 0; i != 8; 
i += 435
)
1546
247
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1547
247
          
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1)125
||
1548
247
          
!isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2)124
||
1549
247
          
!isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3)107
||
1550
247
          
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j)91
||
1551
247
          
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)35
||
1552
247
          
!isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2)35
||
1553
247
          
!isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)35
)
1554
212
        return false;
1555
218
  }
1556
716
  
return true12
;
1557
716
}
1558
1559
/// isVMerge - Common function, used to match vmrg* shuffles.
1560
///
1561
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1562
13.9k
                     unsigned LHSStart, unsigned RHSStart) {
1563
13.9k
  if (N->getValueType(0) != MVT::v16i8)
1564
0
    return false;
1565
13.9k
  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1566
13.9k
         "Unsupported merge size!");
1567
13.9k
1568
27.7k
  for (unsigned i = 0; i != 8/UnitSize; 
++i13.8k
) // Step over units
1569
51.5k
    
for (unsigned j = 0; 24.6k
j != UnitSize;
++j26.8k
) { // Step over bytes within unit
1570
37.6k
      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1571
37.6k
                             LHSStart+j+i*UnitSize) ||
1572
37.6k
          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1573
31.7k
                             RHSStart+j+i*UnitSize))
1574
10.8k
        return false;
1575
37.6k
    }
1576
13.9k
  
return true3.09k
;
1577
13.9k
}
1578
1579
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1580
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1581
/// The ShuffleKind distinguishes between big-endian merges with two
1582
/// different inputs (0), either-endian merges with two identical inputs (1),
1583
/// and little-endian merges with two different inputs (2).  For the latter,
1584
/// the input operands are swapped (see PPCInstrAltivec.td).
1585
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1586
10.7k
                             unsigned ShuffleKind, SelectionDAG &DAG) {
1587
10.7k
  if (DAG.getDataLayout().isLittleEndian()) {
1588
7.17k
    if (ShuffleKind == 1) // unary
1589
699
      return isVMerge(N, UnitSize, 0, 0);
1590
6.47k
    else if (ShuffleKind == 2) // swapped
1591
4.94k
      return isVMerge(N, UnitSize, 0, 16);
1592
1.52k
    else
1593
1.52k
      return false;
1594
3.53k
  } else {
1595
3.53k
    if (ShuffleKind == 1) // unary
1596
541
      return isVMerge(N, UnitSize, 8, 8);
1597
2.99k
    else if (ShuffleKind == 0) // normal
1598
2.98k
      return isVMerge(N, UnitSize, 8, 24);
1599
9
    else
1600
9
      return false;
1601
3.53k
  }
1602
10.7k
}
1603
1604
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1605
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1606
/// The ShuffleKind distinguishes between big-endian merges with two
1607
/// different inputs (0), either-endian merges with two identical inputs (1),
1608
/// and little-endian merges with two different inputs (2).  For the latter,
1609
/// the input operands are swapped (see PPCInstrAltivec.td).
1610
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1611
6.27k
                             unsigned ShuffleKind, SelectionDAG &DAG) {
1612
6.27k
  if (DAG.getDataLayout().isLittleEndian()) {
1613
3.04k
    if (ShuffleKind == 1) // unary
1614
529
      return isVMerge(N, UnitSize, 8, 8);
1615
2.51k
    else if (ShuffleKind == 2) // swapped
1616
990
      return isVMerge(N, UnitSize, 8, 24);
1617
1.52k
    else
1618
1.52k
      return false;
1619
3.23k
  } else {
1620
3.23k
    if (ShuffleKind == 1) // unary
1621
395
      return isVMerge(N, UnitSize, 0, 0);
1622
2.83k
    else if (ShuffleKind == 0) // normal
1623
2.82k
      return isVMerge(N, UnitSize, 0, 16);
1624
9
    else
1625
9
      return false;
1626
3.23k
  }
1627
6.27k
}
1628
1629
/**
1630
 * Common function used to match vmrgew and vmrgow shuffles
1631
 *
1632
 * The indexOffset determines whether to look for even or odd words in
1633
 * the shuffle mask. This is based on the of the endianness of the target
1634
 * machine.
1635
 *   - Little Endian:
1636
 *     - Use offset of 0 to check for odd elements
1637
 *     - Use offset of 4 to check for even elements
1638
 *   - Big Endian:
1639
 *     - Use offset of 0 to check for even elements
1640
 *     - Use offset of 4 to check for odd elements
1641
 * A detailed description of the vector element ordering for little endian and
1642
 * big endian can be found at
1643
 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1644
 * Targeting your applications - what little endian and big endian IBM XL C/C++
1645
 * compiler differences mean to you
1646
 *
1647
 * The mask to the shuffle vector instruction specifies the indices of the
1648
 * elements from the two input vectors to place in the result. The elements are
1649
 * numbered in array-access order, starting with the first vector. These vectors
1650
 * are always of type v16i8, thus each vector will contain 16 elements of size
1651
 * 8. More info on the shuffle vector can be found in the
1652
 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1653
 * Language Reference.
1654
 *
1655
 * The RHSStartValue indicates whether the same input vectors are used (unary)
1656
 * or two different input vectors are used, based on the following:
1657
 *   - If the instruction uses the same vector for both inputs, the range of the
1658
 *     indices will be 0 to 15. In this case, the RHSStart value passed should
1659
 *     be 0.
1660
 *   - If the instruction has two different vectors then the range of the
1661
 *     indices will be 0 to 31. In this case, the RHSStart value passed should
1662
 *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1663
 *     to 31 specify elements in the second vector).
1664
 *
1665
 * \param[in] N The shuffle vector SD Node to analyze
1666
 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1667
 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1668
 * vector to the shuffle_vector instruction
1669
 * \return true iff this shuffle vector represents an even or odd word merge
1670
 */
1671
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1672
1.41k
                     unsigned RHSStartValue) {
1673
1.41k
  if (N->getValueType(0) != MVT::v16i8)
1674
0
    return false;
1675
1.41k
1676
1.65k
  
for (unsigned i = 0; 1.41k
i < 2;
++i241
)
1677
3.01k
    
for (unsigned j = 0; 1.63k
j < 4;
++j1.38k
)
1678
2.77k
      if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1679
2.77k
                             i*RHSStartValue+j+IndexOffset) ||
1680
2.77k
          !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1681
1.64k
                             i*RHSStartValue+j+IndexOffset+8))
1682
1.39k
        return false;
1683
1.41k
  
return true26
;
1684
1.41k
}
1685
1686
/**
1687
 * Determine if the specified shuffle mask is suitable for the vmrgew or
1688
 * vmrgow instructions.
1689
 *
1690
 * \param[in] N The shuffle vector SD Node to analyze
1691
 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1692
 * \param[in] ShuffleKind Identify the type of merge:
1693
 *   - 0 = big-endian merge with two different inputs;
1694
 *   - 1 = either-endian merge with two identical inputs;
1695
 *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1696
 *     little-endian merges).
1697
 * \param[in] DAG The current SelectionDAG
1698
 * \return true iff this shuffle mask
1699
 */
1700
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1701
1.42k
                              unsigned ShuffleKind, SelectionDAG &DAG) {
1702
1.42k
  if (DAG.getDataLayout().isLittleEndian()) {
1703
922
    unsigned indexOffset = CheckEven ? 
4464
:
0458
;
1704
922
    if (ShuffleKind == 1) // Unary
1705
279
      return isVMerge(N, indexOffset, 0);
1706
643
    else if (ShuffleKind == 2) // swapped
1707
637
      return isVMerge(N, indexOffset, 16);
1708
6
    else
1709
6
      return false;
1710
504
  }
1711
504
  else {
1712
504
    unsigned indexOffset = CheckEven ? 
0256
:
4248
;
1713
504
    if (ShuffleKind == 1) // Unary
1714
151
      return isVMerge(N, indexOffset, 0);
1715
353
    else if (ShuffleKind == 0) // Normal
1716
351
      return isVMerge(N, indexOffset, 16);
1717
2
    else
1718
2
      return false;
1719
0
  }
1720
0
  return false;
1721
0
}
1722
1723
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1724
/// amount, otherwise return -1.
1725
/// The ShuffleKind distinguishes between big-endian operations with two
1726
/// different inputs (0), either-endian operations with two identical inputs
1727
/// (1), and little-endian operations with two different inputs (2).  For the
1728
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1729
int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1730
4.74k
                             SelectionDAG &DAG) {
1731
4.74k
  if (N->getValueType(0) != MVT::v16i8)
1732
0
    return -1;
1733
4.74k
1734
4.74k
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1735
4.74k
1736
4.74k
  // Find the first non-undef value in the shuffle mask.
1737
4.74k
  unsigned i;
1738
5.60k
  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; 
++i856
)
1739
856
    /*search*/;
1740
4.74k
1741
4.74k
  if (i == 16) 
return -10
; // all undef.
1742
4.74k
1743
4.74k
  // Otherwise, check to see if the rest of the elements are consecutively
1744
4.74k
  // numbered from this value.
1745
4.74k
  unsigned ShiftAmt = SVOp->getMaskElt(i);
1746
4.74k
  if (ShiftAmt < i) 
return -1146
;
1747
4.60k
1748
4.60k
  ShiftAmt -= i;
1749
4.60k
  bool isLE = DAG.getDataLayout().isLittleEndian();
1750
4.60k
1751
4.60k
  if ((ShuffleKind == 0 && 
!isLE1.52k
) ||
(3.58k
ShuffleKind == 23.58k
&&
isLE2.17k
)) {
1752
3.18k
    // Check the rest of the elements to see if they are consecutive.
1753
10.3k
    for (++i; i != 16; 
++i7.16k
)
1754
10.2k
      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1755
3.11k
        return -1;
1756
3.18k
  } else 
if (1.41k
ShuffleKind == 11.41k
) {
1757
907
    // Check the rest of the elements to see if they are consecutive.
1758
5.60k
    for (++i; i != 16; 
++i4.70k
)
1759
5.49k
      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1760
795
        return -1;
1761
907
  } else
1762
512
    return -1;
1763
183
1764
183
  if (isLE)
1765
52
    ShiftAmt = 16 - ShiftAmt;
1766
183
1767
183
  return ShiftAmt;
1768
183
}
1769
1770
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1771
/// specifies a splat of a single element that is suitable for input to
1772
/// VSPLTB/VSPLTH/VSPLTW.
1773
2.57k
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1774
2.57k
  assert(N->getValueType(0) == MVT::v16i8 &&
1775
2.57k
         (EltSize == 1 || EltSize == 2 || EltSize == 4));
1776
2.57k
1777
2.57k
  // The consecutive indices need to specify an element, not part of two
1778
2.57k
  // different elements.  So abandon ship early if this isn't the case.
1779
2.57k
  if (N->getMaskElt(0) % EltSize != 0)
1780
431
    return false;
1781
2.14k
1782
2.14k
  // This is a splat operation if each element of the permute is the same, and
1783
2.14k
  // if the value doesn't reference the second vector.
1784
2.14k
  unsigned ElementBase = N->getMaskElt(0);
1785
2.14k
1786
2.14k
  // FIXME: Handle UNDEF elements too!
1787
2.14k
  if (ElementBase >= 16)
1788
86
    return false;
1789
2.05k
1790
2.05k
  // Check that the indices are consecutive, in the case of a multi-byte element
1791
2.05k
  // splatted with a v16i8 mask.
1792
4.04k
  
for (unsigned i = 1; 2.05k
i != EltSize;
++i1.98k
)
1793
2.56k
    if (N->getMaskElt(i) < 0 || 
N->getMaskElt(i) != (int)(i+ElementBase)2.20k
)
1794
576
      return false;
1795
2.05k
1796
3.97k
  
for (unsigned i = EltSize, e = 16; 1.48k
i != e;
i += EltSize2.49k
) {
1797
3.70k
    if (N->getMaskElt(i) < 0) 
continue835
;
1798
6.04k
    
for (unsigned j = 0; 2.87k
j != EltSize;
++j3.17k
)
1799
4.39k
      if (N->getMaskElt(i+j) != N->getMaskElt(j))
1800
1.21k
        return false;
1801
2.87k
  }
1802
1.48k
  
return true269
;
1803
1.48k
}
1804
1805
/// Check that the mask is shuffling N byte elements. Within each N byte
1806
/// element of the mask, the indices could be either in increasing or
1807
/// decreasing order as long as they are consecutive.
1808
/// \param[in] N the shuffle vector SD Node to analyze
1809
/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1810
/// Word/DoubleWord/QuadWord).
1811
/// \param[in] StepLen the delta indices number among the N byte element, if
1812
/// the mask is in increasing/decreasing order then it is 1/-1.
1813
/// \return true iff the mask is shuffling N byte elements.
1814
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1815
17.6k
                                   int StepLen) {
1816
17.6k
  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1817
17.6k
         "Unexpected element width.");
1818
17.6k
  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1819
17.6k
1820
17.6k
  unsigned NumOfElem = 16 / Width;
1821
17.6k
  unsigned MaskVal[16]; //  Width is never greater than 16
1822
30.0k
  for (unsigned i = 0; i < NumOfElem; 
++i12.3k
) {
1823
29.1k
    MaskVal[0] = N->getMaskElt(i * Width);
1824
29.1k
    if ((StepLen == 1) && 
(MaskVal[0] % Width)21.7k
) {
1825
3.22k
      return false;
1826
25.8k
    } else if ((StepLen == -1) && 
((MaskVal[0] + 1) % Width)7.37k
) {
1827
7.03k
      return false;
1828
7.03k
    }
1829
18.8k
1830
50.4k
    
for (unsigned int j = 1; 18.8k
j < Width;
++j31.5k
) {
1831
38.0k
      MaskVal[j] = N->getMaskElt(i * Width + j);
1832
38.0k
      if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1833
6.49k
        return false;
1834
6.49k
      }
1835
38.0k
    }
1836
18.8k
  }
1837
17.6k
1838
17.6k
  
return true937
;
1839
17.6k
}
1840
1841
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1842
2.24k
                          unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1843
2.24k
  if (!isNByteElemShuffleMask(N, 4, 1))
1844
2.00k
    return false;
1845
243
1846
243
  // Now we look at mask elements 0,4,8,12
1847
243
  unsigned M0 = N->getMaskElt(0) / 4;
1848
243
  unsigned M1 = N->getMaskElt(4) / 4;
1849
243
  unsigned M2 = N->getMaskElt(8) / 4;
1850
243
  unsigned M3 = N->getMaskElt(12) / 4;
1851
243
  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1852
243
  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1853
243
1854
243
  // Below, let H and L be arbitrary elements of the shuffle mask
1855
243
  // where H is in the range [4,7] and L is in the range [0,3].
1856
243
  // H, 1, 2, 3 or L, 5, 6, 7
1857
243
  if ((M0 > 3 && 
M1 == 164
&&
M2 == 220
&&
M3 == 316
) ||
1858
243
      
(227
M0 < 4227
&&
M1 == 5179
&&
M2 == 620
&&
M3 == 716
)) {
1859
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M0 & 0x3]16
:
BigEndianShifts[M0 & 0x3]16
;
1860
32
    InsertAtByte = IsLE ? 
1216
:
016
;
1861
32
    Swap = M0 < 4;
1862
32
    return true;
1863
32
  }
1864
211
  // 0, H, 2, 3 or 4, L, 6, 7
1865
211
  if ((M1 > 3 && 
M0 == 052
&&
M2 == 220
&&
M3 == 316
) ||
1866
211
      
(195
M1 < 4195
&&
M0 == 4159
&&
M2 == 616
&&
M3 == 716
)) {
1867
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M1 & 0x3]16
:
BigEndianShifts[M1 & 0x3]16
;
1868
32
    InsertAtByte = IsLE ? 
816
:
416
;
1869
32
    Swap = M1 < 4;
1870
32
    return true;
1871
32
  }
1872
179
  // 0, 1, H, 3 or 4, 5, L, 7
1873
179
  if ((M2 > 3 && 
M0 == 070
&&
M1 == 152
&&
M3 == 352
) ||
1874
179
      
(163
M2 < 4163
&&
M0 == 4109
&&
M1 == 516
&&
M3 == 716
)) {
1875
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M2 & 0x3]16
:
BigEndianShifts[M2 & 0x3]16
;
1876
32
    InsertAtByte = IsLE ? 
416
:
816
;
1877
32
    Swap = M2 < 4;
1878
32
    return true;
1879
32
  }
1880
147
  // 0, 1, 2, H or 4, 5, 6, L
1881
147
  if ((M3 > 3 && 
M0 == 059
&&
M1 == 156
&&
M2 == 252
) ||
1882
147
      
(131
M3 < 4131
&&
M0 == 488
&&
M1 == 516
&&
M2 == 616
)) {
1883
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M3 & 0x3]16
:
BigEndianShifts[M3 & 0x3]16
;
1884
32
    InsertAtByte = IsLE ? 
016
:
1216
;
1885
32
    Swap = M3 < 4;
1886
32
    return true;
1887
32
  }
1888
115
1889
115
  // If both vector operands for the shuffle are the same vector, the mask will
1890
115
  // contain only elements from the first one and the second one will be undef.
1891
115
  if (N->getOperand(1).isUndef()) {
1892
72
    ShiftElts = 0;
1893
72
    Swap = true;
1894
72
    unsigned XXINSERTWSrcElem = IsLE ? 
234
:
138
;
1895
72
    if (M0 == XXINSERTWSrcElem && 
M1 == 13
&&
M2 == 22
&&
M3 == 32
) {
1896
2
      InsertAtByte = IsLE ? 
121
:
01
;
1897
2
      return true;
1898
2
    }
1899
70
    if (M0 == 0 && 
M1 == XXINSERTWSrcElem48
&&
M2 == 24
&&
M3 == 33
) {
1900
1
      InsertAtByte = IsLE ? 8 : 
40
;
1901
1
      return true;
1902
1
    }
1903
69
    if (M0 == 0 && 
M1 == 147
&&
M2 == XXINSERTWSrcElem6
&&
M3 == 33
) {
1904
1
      InsertAtByte = IsLE ? 
40
: 8;
1905
1
      return true;
1906
1
    }
1907
68
    if (M0 == 0 && 
M1 == 146
&&
M2 == 25
&&
M3 == XXINSERTWSrcElem4
) {
1908
2
      InsertAtByte = IsLE ? 
01
:
121
;
1909
2
      return true;
1910
2
    }
1911
109
  }
1912
109
1913
109
  return false;
1914
109
}
1915
1916
bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1917
3.05k
                               bool &Swap, bool IsLE) {
1918
3.05k
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1919
3.05k
  // Ensure each byte index of the word is consecutive.
1920
3.05k
  if (!isNByteElemShuffleMask(N, 4, 1))
1921
2.73k
    return false;
1922
316
1923
316
  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1924
316
  unsigned M0 = N->getMaskElt(0) / 4;
1925
316
  unsigned M1 = N->getMaskElt(4) / 4;
1926
316
  unsigned M2 = N->getMaskElt(8) / 4;
1927
316
  unsigned M3 = N->getMaskElt(12) / 4;
1928
316
1929
316
  // If both vector operands for the shuffle are the same vector, the mask will
1930
316
  // contain only elements from the first one and the second one will be undef.
1931
316
  if (N->getOperand(1).isUndef()) {
1932
172
    assert(M0 < 4 && "Indexing into an undef vector?");
1933
172
    if (M1 != (M0 + 1) % 4 || 
M2 != (M1 + 1) % 439
||
M3 != (M2 + 1) % 438
)
1934
140
      return false;
1935
32
1936
32
    ShiftElts = IsLE ? 
(4 - M0) % 49
:
M023
;
1937
32
    Swap = false;
1938
32
    return true;
1939
32
  }
1940
144
1941
144
  // Ensure each word index of the ShuffleVector Mask is consecutive.
1942
144
  if (M1 != (M0 + 1) % 8 || 
M2 != (M1 + 1) % 8103
||
M3 != (M2 + 1) % 843
)
1943
102
    return false;
1944
42
1945
42
  if (IsLE) {
1946
14
    if (M0 == 0 || M0 == 7 || 
M0 == 610
||
M0 == 58
) {
1947
8
      // Input vectors don't need to be swapped if the leading element
1948
8
      // of the result is one of the 3 left elements of the second vector
1949
8
      // (or if there is no shift to be done at all).
1950
8
      Swap = false;
1951
8
      ShiftElts = (8 - M0) % 8;
1952
8
    } else 
if (6
M0 == 46
||
M0 == 36
||
M0 == 24
||
M0 == 12
) {
1953
6
      // Input vectors need to be swapped if the leading element
1954
6
      // of the result is one of the 3 left elements of the first vector
1955
6
      // (or if we're shifting by 4 - thereby simply swapping the vectors).
1956
6
      Swap = true;
1957
6
      ShiftElts = (4 - M0) % 4;
1958
6
    }
1959
14
1960
14
    return true;
1961
28
  } else {                                          // BE
1962
28
    if (M0 == 0 || M0 == 1 || 
M0 == 219
||
M0 == 313
) {
1963
20
      // Input vectors don't need to be swapped if the leading element
1964
20
      // of the result is one of the 4 elements of the first vector.
1965
20
      Swap = false;
1966
20
      ShiftElts = M0;
1967
20
    } else 
if (8
M0 == 48
||
M0 == 58
||
M0 == 66
||
M0 == 74
) {
1968
8
      // Input vectors need to be swapped if the leading element
1969
8
      // of the result is one of the 4 elements of the right vector.
1970
8
      Swap = true;
1971
8
      ShiftElts = M0 - 4;
1972
8
    }
1973
28
1974
28
    return true;
1975
28
  }
1976
42
}
1977
1978
7.30k
bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1979
7.30k
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1980
7.30k
1981
7.30k
  if (!isNByteElemShuffleMask(N, Width, -1))
1982
7.28k
    return false;
1983
20
1984
50
  
for (int i = 0; 20
i < 16;
i += Width30
)
1985
42
    if (N->getMaskElt(i) != i + Width - 1)
1986
12
      return false;
1987
20
1988
20
  
return true8
;
1989
20
}
1990
1991
1.82k
bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1992
1.82k
  return isXXBRShuffleMaskHelper(N, 2);
1993
1.82k
}
1994
1995
1.82k
bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1996
1.82k
  return isXXBRShuffleMaskHelper(N, 4);
1997
1.82k
}
1998
1999
1.82k
bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2000
1.82k
  return isXXBRShuffleMaskHelper(N, 8);
2001
1.82k
}
2002
2003
1.82k
bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2004
1.82k
  return isXXBRShuffleMaskHelper(N, 16);
2005
1.82k
}
2006
2007
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2008
/// if the inputs to the instruction should be swapped and set \p DM to the
2009
/// value for the immediate.
2010
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2011
/// AND element 0 of the result comes from the first input (LE) or second input
2012
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2013
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2014
/// mask.
2015
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2016
2.97k
                               bool &Swap, bool IsLE) {
2017
2.97k
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2018
2.97k
2019
2.97k
  // Ensure each byte index of the double word is consecutive.
2020
2.97k
  if (!isNByteElemShuffleMask(N, 8, 1))
2021
2.92k
    return false;
2022
58
2023
58
  unsigned M0 = N->getMaskElt(0) / 8;
2024
58
  unsigned M1 = N->getMaskElt(8) / 8;
2025
58
  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2026
58
2027
58
  // If both vector operands for the shuffle are the same vector, the mask will
2028
58
  // contain only elements from the first one and the second one will be undef.
2029
58
  if (N->getOperand(1).isUndef()) {
2030
0
    if ((M0 | M1) < 2) {
2031
0
      DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2032
0
      Swap = false;
2033
0
      return true;
2034
0
    } else
2035
0
      return false;
2036
58
  }
2037
58
2038
58
  if (IsLE) {
2039
37
    if (M0 > 1 && 
M1 < 20
) {
2040
0
      Swap = false;
2041
37
    } else if (M0 < 2 && M1 > 1) {
2042
37
      M0 = (M0 + 2) % 4;
2043
37
      M1 = (M1 + 2) % 4;
2044
37
      Swap = true;
2045
37
    } else
2046
0
      return false;
2047
37
2048
37
    // Note: if control flow comes here that means Swap is already set above
2049
37
    DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2050
37
    return true;
2051
37
  } else { // BE
2052
21
    if (M0 < 2 && M1 > 1) {
2053
21
      Swap = false;
2054
21
    } else 
if (0
M0 > 10
&&
M1 < 20
) {
2055
0
      M0 = (M0 + 2) % 4;
2056
0
      M1 = (M1 + 2) % 4;
2057
0
      Swap = true;
2058
0
    } else
2059
0
      return false;
2060
21
2061
21
    // Note: if control flow comes here that means Swap is already set above
2062
21
    DM = (M0 << 1) + (M1 & 1);
2063
21
    return true;
2064
21
  }
2065
58
}
2066
2067
2068
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2069
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2070
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2071
165
                                SelectionDAG &DAG) {
2072
165
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2073
165
  assert(isSplatShuffleMask(SVOp, EltSize));
2074
165
  if (DAG.getDataLayout().isLittleEndian())
2075
78
    return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2076
87
  else
2077
87
    return SVOp->getMaskElt(0) / EltSize;
2078
165
}
2079
2080
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2081
/// by using a vspltis[bhw] instruction of the specified element size, return
2082
/// the constant being splatted.  The ByteSize field indicates the number of
2083
/// bytes of each element [124] -> [bhw].
2084
230
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2085
230
  SDValue OpVal(nullptr, 0);
2086
230
2087
230
  // If ByteSize of the splat is bigger than the element size of the
2088
230
  // build_vector, then we have a case where we are checking for a splat where
2089
230
  // multiple elements of the buildvector are folded together into a single
2090
230
  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2091
230
  unsigned EltSize = 16/N->getNumOperands();
2092
230
  if (EltSize < ByteSize) {
2093
0
    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2094
0
    SDValue UniquedVals[4];
2095
0
    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2096
0
2097
0
    // See if all of the elements in the buildvector agree across.
2098
0
    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2099
0
      if (N->getOperand(i).isUndef()) continue;
2100
0
      // If the element isn't a constant, bail fully out.
2101
0
      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2102
0
2103
0
      if (!UniquedVals[i&(Multiple-1)].getNode())
2104
0
        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2105
0
      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2106
0
        return SDValue();  // no match.
2107
0
    }
2108
0
2109
0
    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2110
0
    // either constant or undef values that are identical for each chunk.  See
2111
0
    // if these chunks can form into a larger vspltis*.
2112
0
2113
0
    // Check to see if all of the leading entries are either 0 or -1.  If
2114
0
    // neither, then this won't fit into the immediate field.
2115
0
    bool LeadingZero = true;
2116
0
    bool LeadingOnes = true;
2117
0
    for (unsigned i = 0; i != Multiple-1; ++i) {
2118
0
      if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2119
0
2120
0
      LeadingZero &= isNullConstant(UniquedVals[i]);
2121
0
      LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2122
0
    }
2123
0
    // Finally, check the least significant entry.
2124
0
    if (LeadingZero) {
2125
0
      if (!UniquedVals[Multiple-1].getNode())
2126
0
        return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2127
0
      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2128
0
      if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2129
0
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2130
0
    }
2131
0
    if (LeadingOnes) {
2132
0
      if (!UniquedVals[Multiple-1].getNode())
2133
0
        return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2134
0
      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2135
0
      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2136
0
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2137
0
    }
2138
0
2139
0
    return SDValue();
2140
0
  }
2141
230
2142
230
  // Check to see if this buildvec has a single non-undef value in its elements.
2143
2.70k
  
for (unsigned i = 0, e = N->getNumOperands(); 230
i != e;
++i2.47k
) {
2144
2.47k
    if (N->getOperand(i).isUndef()) 
continue0
;
2145
2.47k
    if (!OpVal.getNode())
2146
230
      OpVal = N->getOperand(i);
2147
2.24k
    else if (OpVal != N->getOperand(i))
2148
0
      return SDValue();
2149
2.47k
  }
2150
230
2151
230
  if (!OpVal.getNode()) 
return SDValue()0
; // All UNDEF: use implicit def.
2152
230
2153
230
  unsigned ValSizeInBytes = EltSize;
2154
230
  uint64_t Value = 0;
2155
230
  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2156
230
    Value = CN->getZExtValue();
2157
230
  } else 
if (ConstantFPSDNode *0
CN0
= dyn_cast<ConstantFPSDNode>(OpVal)) {
2158
0
    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2159
0
    Value = FloatToBits(CN->getValueAPF().convertToFloat());
2160
0
  }
2161
230
2162
230
  // If the splat value is larger than the element value, then we can never do
2163
230
  // this splat.  The only case that we could fit the replicated bits into our
2164
230
  // immediate field for would be zero, and we prefer to use vxor for it.
2165
230
  if (ValSizeInBytes < ByteSize) 
return SDValue()0
;
2166
230
2167
230
  // If the element value is larger than the splat value, check if it consists
2168
230
  // of a repeated bit pattern of size ByteSize.
2169
230
  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2170
0
    return SDValue();
2171
230
2172
230
  // Properly sign extend the value.
2173
230
  int MaskVal = SignExtend32(Value, ByteSize * 8);
2174
230
2175
230
  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2176
230
  if (MaskVal == 0) 
return SDValue()42
;
2177
188
2178
188
  // Finally, if this value fits in a 5 bit sext field, return it
2179
188
  if (SignExtend32<5>(MaskVal) == MaskVal)
2180
188
    return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2181
0
  return SDValue();
2182
0
}
2183
2184
/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2185
/// amount, otherwise return -1.
2186
66
int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2187
66
  EVT VT = N->getValueType(0);
2188
66
  if (VT != MVT::v4f64 && 
VT != MVT::v4f3224
&&
VT != MVT::v4i10
)
2189
0
    return -1;
2190
66
2191
66
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2192
66
2193
66
  // Find the first non-undef value in the shuffle mask.
2194
66
  unsigned i;
2195
66
  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; 
++i0
)
2196
0
    /*search*/;
2197
66
2198
66
  if (i == 4) 
return -10
; // all undef.
2199
66
2200
66
  // Otherwise, check to see if the rest of the elements are consecutively
2201
66
  // numbered from this value.
2202
66
  unsigned ShiftAmt = SVOp->getMaskElt(i);
2203
66
  if (ShiftAmt < i) 
return -10
;
2204
66
  ShiftAmt -= i;
2205
66
2206
66
  // Check the rest of the elements to see if they are consecutive.
2207
82
  for (++i; i != 4; 
++i16
)
2208
82
    if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2209
66
      return -1;
2210
66
2211
66
  
return ShiftAmt0
;
2212
66
}
2213
2214
//===----------------------------------------------------------------------===//
2215
//  Addressing Mode Selection
2216
//===----------------------------------------------------------------------===//
2217
2218
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2219
/// or 64-bit immediate, and if the value can be accurately represented as a
2220
/// sign extension from a 16-bit value.  If so, this returns true and the
2221
/// immediate.
2222
17.7k
bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2223
17.7k
  if (!isa<ConstantSDNode>(N))
2224
1.49k
    return false;
2225
16.2k
2226
16.2k
  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2227
16.2k
  if (N->getValueType(0) == MVT::i32)
2228
2.91k
    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2229
13.3k
  else
2230
13.3k
    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2231
16.2k
}
2232
17.6k
bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2233
17.6k
  return isIntS16Immediate(Op.getNode(), Imm);
2234
17.6k
}
2235
2236
2237
/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2238
/// be represented as an indexed [r+r] operation.
2239
bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2240
                                               SDValue &Index,
2241
43
                                               SelectionDAG &DAG) const {
2242
43
  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2243
84
      UI != E; 
++UI41
) {
2244
47
    if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2245
43
      if (Memop->getMemoryVT() == MVT::f64) {
2246
6
          Base = N.getOperand(0);
2247
6
          Index = N.getOperand(1);
2248
6
          return true;
2249
6
      }
2250
43
    }
2251
47
  }
2252
43
  
return false37
;
2253
43
}
2254
2255
/// SelectAddressRegReg - Given the specified addressed, check to see if it
2256
/// can be represented as an indexed [r+r] operation.  Returns false if it
2257
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2258
/// non-zero and N can be represented by a base register plus a signed 16-bit
2259
/// displacement, make a more precise judgement by checking (displacement % \p
2260
/// EncodingAlignment).
2261
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2262
                                            SDValue &Index, SelectionDAG &DAG,
2263
18.9k
                                            unsigned EncodingAlignment) const {
2264
18.9k
  int16_t imm = 0;
2265
18.9k
  if (N.getOpcode() == ISD::ADD) {
2266
7.85k
    // Is there any SPE load/store (f64), which can't handle 16bit offset?
2267
7.85k
    // SPE load/store can only handle 8-bit offsets.
2268
7.85k
    if (hasSPE() && 
SelectAddressEVXRegReg(N, Base, Index, DAG)43
)
2269
6
        return true;
2270
7.84k
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2271
7.84k
        
(6.96k
!EncodingAlignment6.96k
||
!(imm % EncodingAlignment)2.89k
))
2272
6.93k
      return false; // r+i
2273
911
    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2274
256
      return false;    // r+i
2275
655
2276
655
    Base = N.getOperand(0);
2277
655
    Index = N.getOperand(1);
2278
655
    return true;
2279
11.1k
  } else if (N.getOpcode() == ISD::OR) {
2280
923
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2281
923
        
(903
!EncodingAlignment903
||
!(imm % EncodingAlignment)113
))
2282
903
      return false; // r+i can fold it if we can.
2283
20
2284
20
    // If this is an or of disjoint bitfields, we can codegen this as an add
2285
20
    // (for better address arithmetic) if the LHS and RHS of the OR are provably
2286
20
    // disjoint.
2287
20
    KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2288
20
2289
20
    if (LHSKnown.Zero.getBoolValue()) {
2290
20
      KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2291
20
      // If all of the bits are known zero on the LHS or RHS, the add won't
2292
20
      // carry.
2293
20
      if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2294
20
        Base = N.getOperand(0);
2295
20
        Index = N.getOperand(1);
2296
20
        return true;
2297
20
      }
2298
10.2k
    }
2299
20
  }
2300
10.2k
2301
10.2k
  return false;
2302
10.2k
}
2303
2304
// If we happen to be doing an i64 load or store into a stack slot that has
2305
// less than a 4-byte alignment, then the frame-index elimination may need to
2306
// use an indexed load or store instruction (because the offset may not be a
2307
// multiple of 4). The extra register needed to hold the offset comes from the
2308
// register scavenger, and it is possible that the scavenger will need to use
2309
// an emergency spill slot. As a result, we need to make sure that a spill slot
2310
// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2311
// stack slot.
2312
3.48k
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2313
3.48k
  // FIXME: This does not handle the LWA case.
2314
3.48k
  if (VT != MVT::i64)
2315
1.39k
    return;
2316
2.09k
2317
2.09k
  // NOTE: We'll exclude negative FIs here, which come from argument
2318
2.09k
  // lowering, because there are no known test cases triggering this problem
2319
2.09k
  // using packed structures (or similar). We can remove this exclusion if
2320
2.09k
  // we find such a test case. The reason why this is so test-case driven is
2321
2.09k
  // because this entire 'fixup' is only to prevent crashes (from the
2322
2.09k
  // register scavenger) on not-really-valid inputs. For example, if we have:
2323
2.09k
  //   %a = alloca i1
2324
2.09k
  //   %b = bitcast i1* %a to i64*
2325
2.09k
  //   store i64* a, i64 b
2326
2.09k
  // then the store should really be marked as 'align 1', but is not. If it
2327
2.09k
  // were marked as 'align 1' then the indexed form would have been
2328
2.09k
  // instruction-selected initially, and the problem this 'fixup' is preventing
2329
2.09k
  // won't happen regardless.
2330
2.09k
  if (FrameIdx < 0)
2331
586
    return;
2332
1.50k
2333
1.50k
  MachineFunction &MF = DAG.getMachineFunction();
2334
1.50k
  MachineFrameInfo &MFI = MF.getFrameInfo();
2335
1.50k
2336
1.50k
  unsigned Align = MFI.getObjectAlignment(FrameIdx);
2337
1.50k
  if (Align >= 4)
2338
1.44k
    return;
2339
56
2340
56
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2341
56
  FuncInfo->setHasNonRISpills();
2342
56
}
2343
2344
/// Returns true if the address N can be represented by a base register plus
2345
/// a signed 16-bit displacement [r+imm], and if it is not better
2346
/// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2347
/// displacements that are multiples of that value.
2348
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2349
                                            SDValue &Base,
2350
                                            SelectionDAG &DAG,
2351
11.1k
                                            unsigned EncodingAlignment) const {
2352
11.1k
  // FIXME dl should come from parent load or store, not from address
2353
11.1k
  SDLoc dl(N);
2354
11.1k
  // If this can be more profitably realized as r+r, fail.
2355
11.1k
  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2356
232
    return false;
2357
10.8k
2358
10.8k
  if (N.getOpcode() == ISD::ADD) {
2359
4.49k
    int16_t imm = 0;
2360
4.49k
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2361
4.49k
        
(4.32k
!EncodingAlignment4.32k
||
(imm % EncodingAlignment) == 02.29k
)) {
2362
4.32k
      Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2363
4.32k
      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2364
317
        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2365
317
        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2366
4.00k
      } else {
2367
4.00k
        Base = N.getOperand(0);
2368
4.00k
      }
2369
4.32k
      return true; // [r+i]
2370
4.32k
    } else 
if (176
N.getOperand(1).getOpcode() == PPCISD::Lo176
) {
2371
176
      // Match LOAD (ADD (X, Lo(G))).
2372
176
      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2373
176
             && "Cannot handle constant offsets yet!");
2374
176
      Disp = N.getOperand(1).getOperand(0);  // The global address.
2375
176
      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2376
176
             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2377
176
             Disp.getOpcode() == ISD::TargetConstantPool ||
2378
176
             Disp.getOpcode() == ISD::TargetJumpTable);
2379
176
      Base = N.getOperand(0);
2380
176
      return true;  // [&g+r]
2381
176
    }
2382
6.37k
  } else if (N.getOpcode() == ISD::OR) {
2383
896
    int16_t imm = 0;
2384
896
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2385
896
        (!EncodingAlignment || 
(imm % EncodingAlignment) == 0113
)) {
2386
896
      // If this is an or of disjoint bitfields, we can codegen this as an add
2387
896
      // (for better address arithmetic) if the LHS and RHS of the OR are
2388
896
      // provably disjoint.
2389
896
      KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2390
896
2391
896
      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2392
896
        // If all of the bits are known zero on the LHS or RHS, the add won't
2393
896
        // carry.
2394
896
        if (FrameIndexSDNode *FI =
2395
893
              dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2396
893
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2397
893
          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2398
893
        } else {
2399
3
          Base = N.getOperand(0);
2400
3
        }
2401
896
        Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2402
896
        return true;
2403
896
      }
2404
5.48k
    }
2405
5.48k
  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2406
127
    // Loading from a constant address.
2407
127
2408
127
    // If this address fits entirely in a 16-bit sext immediate field, codegen
2409
127
    // this as "d, 0"
2410
127
    int16_t Imm;
2411
127
    if (isIntS16Immediate(CN, Imm) &&
2412
127
        
(117
!EncodingAlignment117
||
(Imm % EncodingAlignment) == 037
)) {
2413
117
      Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2414
117
      Base = DAG.getRegister(Subtarget.isPPC64() ? 
PPC::ZERO848
:
PPC::ZERO69
,
2415
117
                             CN->getValueType(0));
2416
117
      return true;
2417
117
    }
2418
10
2419
10
    // Handle 32-bit sext immediates with LIS + addr mode.
2420
10
    if ((CN->getValueType(0) == MVT::i32 ||
2421
10
         
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()5
) &&
2422
10
        
(7
!EncodingAlignment7
||
(CN->getZExtValue() % EncodingAlignment) == 01
)) {
2423
7
      int Addr = (int)CN->getZExtValue();
2424
7
2425
7
      // Otherwise, break this down into an LIS + disp.
2426
7
      Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2427
7
2428
7
      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2429
7
                                   MVT::i32);
2430
7
      unsigned Opc = CN->getValueType(0) == MVT::i32 ? 
PPC::LIS5
:
PPC::LIS82
;
2431
7
      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2432
7
      return true;
2433
7
    }
2434
5.35k
  }
2435
5.35k
2436
5.35k
  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2437
5.35k
  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2438
2.27k
    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2439
2.27k
    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2440
2.27k
  } else
2441
3.08k
    Base = N;
2442
5.35k
  return true;      // [r+0]
2443
5.35k
}
2444
2445
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2446
/// represented as an indexed [r+r] operation.
2447
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2448
                                                SDValue &Index,
2449
5.77k
                                                SelectionDAG &DAG) const {
2450
5.77k
  // Check to see if we can easily represent this as an [r+r] address.  This
2451
5.77k
  // will fail if it thinks that the address is more profitably represented as
2452
5.77k
  // reg+imm, e.g. where imm = 0.
2453
5.77k
  if (SelectAddressRegReg(N, Base, Index, DAG))
2454
124
    return true;
2455
5.64k
2456
5.64k
  // If the address is the result of an add, we will utilize the fact that the
2457
5.64k
  // address calculation includes an implicit add.  However, we can reduce
2458
5.64k
  // register pressure if we do not materialize a constant just for use as the
2459
5.64k
  // index register.  We only get rid of the add if it is not an add of a
2460
5.64k
  // value and a 16-bit signed constant and both have a single use.
2461
5.64k
  int16_t imm = 0;
2462
5.64k
  if (N.getOpcode() == ISD::ADD &&
2463
5.64k
      
(1.28k
!isIntS16Immediate(N.getOperand(1), imm)1.28k
||
2464
1.28k
       
!N.getOperand(1).hasOneUse()1.26k
||
!N.getOperand(0).hasOneUse()865
)) {
2465
1.15k
    Base = N.getOperand(0);
2466
1.15k
    Index = N.getOperand(1);
2467
1.15k
    return true;
2468
1.15k
  }
2469
4.49k
2470
4.49k
  // Otherwise, do it the hard way, using R0 as the base register.
2471
4.49k
  Base = DAG.getRegister(Subtarget.isPPC64() ? 
PPC::ZERO84.22k
:
PPC::ZERO272
,
2472
4.49k
                         N.getValueType());
2473
4.49k
  Index = N;
2474
4.49k
  return true;
2475
4.49k
}
2476
2477
/// Returns true if we should use a direct load into vector instruction
2478
/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2479
518
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2480
518
2481
518
  // If there are any other uses other than scalar to vector, then we should
2482
518
  // keep it as a scalar load -> direct move pattern to prevent multiple
2483
518
  // loads.
2484
518
  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2485
518
  if (!LD)
2486
0
    return false;
2487
518
2488
518
  EVT MemVT = LD->getMemoryVT();
2489
518
  if (!MemVT.isSimple())
2490
0
    return false;
2491
518
  switch(MemVT.getSimpleVT().SimpleTy) {
2492
518
  case MVT::i64:
2493
100
    break;
2494
518
  case MVT::i32:
2495
195
    if (!ST.hasP8Vector())
2496
163
      return false;
2497
32
    break;
2498
122
  case MVT::i16:
2499
122
  case MVT::i8:
2500
122
    if (!ST.hasP9Vector())
2501
118
      return false;
2502
4
    break;
2503
101
  default:
2504
101
    return false;
2505
136
  }
2506
136
2507
136
  SDValue LoadedVal(N, 0);
2508
136
  if (!LoadedVal.hasOneUse())
2509
9
    return false;
2510
127
2511
127
  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2512
211
       UI != UE; 
++UI84
)
2513
203
    if (UI.getUse().get().getResNo() == 0 &&
2514
203
        
UI->getOpcode() != ISD::SCALAR_TO_VECTOR127
)
2515
119
      return false;
2516
127
2517
127
  
return true8
;
2518
127
}
2519
2520
/// getPreIndexedAddressParts - returns true by value, base pointer and
2521
/// offset pointer and addressing mode by reference if the node's address
2522
/// can be legally represented as pre-indexed load / store address.
2523
bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2524
                                                  SDValue &Offset,
2525
                                                  ISD::MemIndexedMode &AM,
2526
929
                                                  SelectionDAG &DAG) const {
2527
929
  if (DisablePPCPreinc) 
return false0
;
2528
929
2529
929
  bool isLoad = true;
2530
929
  SDValue Ptr;
2531
929
  EVT VT;
2532
929
  unsigned Alignment;
2533
929
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2534
518
    Ptr = LD->getBasePtr();
2535
518
    VT = LD->getMemoryVT();
2536
518
    Alignment = LD->getAlignment();
2537
518
  } else 
if (StoreSDNode *411
ST411
= dyn_cast<StoreSDNode>(N)) {
2538
411
    Ptr = ST->getBasePtr();
2539
411
    VT  = ST->getMemoryVT();
2540
411
    Alignment = ST->getAlignment();
2541
411
    isLoad = false;
2542
411
  } else
2543
0
    return false;
2544
929
2545
929
  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2546
929
  // instructions because we can fold these into a more efficient instruction
2547
929
  // instead, (such as LXSD).
2548
929
  if (isLoad && 
usePartialVectorLoads(N, Subtarget)518
) {
2549
8
    return false;
2550
8
  }
2551
921
2552
921
  // PowerPC doesn't have preinc load/store instructions for vectors (except
2553
921
  // for QPX, which does have preinc r+r forms).
2554
921
  if (VT.isVector()) {
2555
1
    if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && 
VT != MVT::v4f320
)) {
2556
0
      return false;
2557
1
    } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2558
1
      AM = ISD::PRE_INC;
2559
1
      return true;
2560
1
    }
2561
920
  }
2562
920
2563
920
  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2564
77
    // Common code will reject creating a pre-inc form if the base pointer
2565
77
    // is a frame index, or if N is a store and the base pointer is either
2566
77
    // the same as or a predecessor of the value being stored.  Check for
2567
77
    // those situations here, and try with swapped Base/Offset instead.
2568
77
    bool Swap = false;
2569
77
2570
77
    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2571
0
      Swap = true;
2572
77
    else if (!isLoad) {
2573
17
      SDValue Val = cast<StoreSDNode>(N)->getValue();
2574
17
      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2575
8
        Swap = true;
2576
17
    }
2577
77
2578
77
    if (Swap)
2579
8
      std::swap(Base, Offset);
2580
77
2581
77
    AM = ISD::PRE_INC;
2582
77
    return true;
2583
77
  }
2584
843
2585
843
  // LDU/STU can only handle immediates that are a multiple of 4.
2586
843
  if (VT != MVT::i64) {
2587
646
    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2588
0
      return false;
2589
197
  } else {
2590
197
    // LDU/STU need an address with at least 4-byte alignment.
2591
197
    if (Alignment < 4)
2592
4
      return false;
2593
193
2594
193
    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2595
0
      return false;
2596
839
  }
2597
839
2598
839
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2599
447
    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2600
447
    // sext i32 to i64 when addr mode is r+i.
2601
447
    if (LD->getValueType(0) == MVT::i64 && 
LD->getMemoryVT() == MVT::i32103
&&
2602
447
        
LD->getExtensionType() == ISD::SEXTLOAD1
&&
2603
447
        
isa<ConstantSDNode>(Offset)0
)
2604
0
      return false;
2605
839
  }
2606
839
2607
839
  AM = ISD::PRE_INC;
2608
839
  return true;
2609
839
}
2610
2611
//===----------------------------------------------------------------------===//
2612
//  LowerOperation implementation
2613
//===----------------------------------------------------------------------===//
2614
2615
/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2616
/// and LoOpFlags to the target MO flags.
2617
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2618
                               unsigned &HiOpFlags, unsigned &LoOpFlags,
2619
334
                               const GlobalValue *GV = nullptr) {
2620
334
  HiOpFlags = PPCII::MO_HA;
2621
334
  LoOpFlags = PPCII::MO_LO;
2622
334
2623
334
  // Don't use the pic base if not in PIC relocation model.
2624
334
  if (IsPIC) {
2625
29
    HiOpFlags |= PPCII::MO_PIC_FLAG;
2626
29
    LoOpFlags |= PPCII::MO_PIC_FLAG;
2627
29
  }
2628
334
2629
334
  // If this is a reference to a global value that requires a non-lazy-ptr, make
2630
334
  // sure that instruction lowering adds it.
2631
334
  if (GV && 
Subtarget.hasLazyResolverStub(GV)233
) {
2632
0
    HiOpFlags |= PPCII::MO_NLP_FLAG;
2633
0
    LoOpFlags |= PPCII::MO_NLP_FLAG;
2634
0
2635
0
    if (GV->hasHiddenVisibility()) {
2636
0
      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2637
0
      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2638
0
    }
2639
0
  }
2640
334
}
2641
2642
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2643
305
                             SelectionDAG &DAG) {
2644
305
  SDLoc DL(HiPart);
2645
305
  EVT PtrVT = HiPart.getValueType();
2646
305
  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2647
305
2648
305
  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2649
305
  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2650
305
2651
305
  // With PIC, the first instruction is actually "GR+hi(&G)".
2652
305
  if (isPIC)
2653
0
    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2654
0
                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2655
305
2656
305
  // Generate non-pic code that has direct accesses to the constant pool.
2657
305
  // The address of the global is just (hi(&g)+lo(&g)).
2658
305
  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2659
305
}
2660
2661
5.47k
static void setUsesTOCBasePtr(MachineFunction &MF) {
2662
5.47k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2663
5.47k
  FuncInfo->setUsesTOCBasePtr();
2664
5.47k
}
2665
2666
5.46k
static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2667
5.46k
  setUsesTOCBasePtr(DAG.getMachineFunction());
2668
5.46k
}
2669
2670
static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2671
3.54k
                           SDValue GA) {
2672
3.54k
  EVT VT = Is64Bit ? 
MVT::i643.51k
:
MVT::i3232
;
2673
3.54k
  SDValue Reg = Is64Bit ? 
DAG.getRegister(PPC::X2, VT)3.51k
:
2674
3.54k
                
DAG.getNode(PPCISD::GlobalBaseReg, dl, VT)32
;
2675
3.54k
2676
3.54k
  SDValue Ops[] = { GA, Reg };
2677
3.54k
  return DAG.getMemIntrinsicNode(
2678
3.54k
      PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2679
3.54k
      MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
2680
3.54k
      MachineMemOperand::MOLoad);
2681
3.54k
}
2682
2683
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2684
1.79k
                                             SelectionDAG &DAG) const {
2685
1.79k
  EVT PtrVT = Op.getValueType();
2686
1.79k
  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2687
1.79k
  const Constant *C = CP->getConstVal();
2688
1.79k
2689
1.79k
  // 64-bit SVR4 ABI code is always position-independent.
2690
1.79k
  // The actual address of the GlobalValue is stored in the TOC.
2691
1.79k
  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2692
1.69k
    setUsesTOCBasePtr(DAG);
2693
1.69k
    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2694
1.69k
    return getTOCEntry(DAG, SDLoc(CP), true, GA);
2695
1.69k
  }
2696
96
2697
96
  unsigned MOHiFlag, MOLoFlag;
2698
96
  bool IsPIC = isPositionIndependent();
2699
96
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2700
96
2701
96
  if (IsPIC && 
Subtarget.isSVR4ABI()11
) {
2702
11
    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2703
11
                                           PPCII::MO_PIC_FLAG);
2704
11
    return getTOCEntry(DAG, SDLoc(CP), false, GA);
2705
11
  }
2706
85
2707
85
  SDValue CPIHi =
2708
85
    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2709
85
  SDValue CPILo =
2710
85
    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2711
85
  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2712
85
}
2713
2714
// For 64-bit PowerPC, prefer the more compact relative encodings.
2715
// This trades 32 bits per jump table entry for one or two instructions
2716
// on the jump site.
2717
19
unsigned PPCTargetLowering::getJumpTableEncoding() const {
2718
19
  if (isJumpTableRelative())
2719
16
    return MachineJumpTableInfo::EK_LabelDifference32;
2720
3
2721
3
  return TargetLowering::getJumpTableEncoding();
2722
3
}
2723
2724
32
bool PPCTargetLowering::isJumpTableRelative() const {
2725
32
  if (Subtarget.isPPC64())
2726
26
    return true;
2727
6
  return TargetLowering::isJumpTableRelative();
2728
6
}
2729
2730
SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2731
10
                                                    SelectionDAG &DAG) const {
2732
10
  if (!Subtarget.isPPC64())
2733
0
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2734
10
2735
10
  switch (getTargetMachine().getCodeModel()) {
2736
10
  case CodeModel::Small:
2737
6
  case CodeModel::Medium:
2738
6
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2739
6
  default:
2740
4
    return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2741
4
                       getPointerTy(DAG.getDataLayout()));
2742
10
  }
2743
10
}
2744
2745
const MCExpr *
2746
PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2747
                                                unsigned JTI,
2748
56
                                                MCContext &Ctx) const {
2749
56
  if (!Subtarget.isPPC64())
2750
0
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2751
56
2752
56
  switch (getTargetMachine().getCodeModel()) {
2753
56
  case CodeModel::Small:
2754
38
  case CodeModel::Medium:
2755
38
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2756
38
  default:
2757
18
    return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2758
56
  }
2759
56
}
2760
2761
13
SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2762
13
  EVT PtrVT = Op.getValueType();
2763
13
  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2764
13
2765
13
  // 64-bit SVR4 ABI code is always position-independent.
2766
13
  // The actual address of the GlobalValue is stored in the TOC.
2767
13
  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2768
10
    setUsesTOCBasePtr(DAG);
2769
10
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2770
10
    return getTOCEntry(DAG, SDLoc(JT), true, GA);
2771
10
  }
2772
3
2773
3
  unsigned MOHiFlag, MOLoFlag;
2774
3
  bool IsPIC = isPositionIndependent();
2775
3
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2776
3
2777
3
  if (IsPIC && 
Subtarget.isSVR4ABI()0
) {
2778
0
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2779
0
                                        PPCII::MO_PIC_FLAG);
2780
0
    return getTOCEntry(DAG, SDLoc(GA), false, GA);
2781
0
  }
2782
3
2783
3
  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2784
3
  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2785
3
  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2786
3
}
2787
2788
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2789
14
                                             SelectionDAG &DAG) const {
2790
14
  EVT PtrVT = Op.getValueType();
2791
14
  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2792
14
  const BlockAddress *BA = BASDN->getBlockAddress();
2793
14
2794
14
  // 64-bit SVR4 ABI code is always position-independent.
2795
14
  // The actual BlockAddress is stored in the TOC.
2796
14
  if (Subtarget.isSVR4ABI() &&
2797
14
      (Subtarget.isPPC64() || 
isPositionIndependent()5
)) {
2798
12
    if (Subtarget.isPPC64())
2799
9
      setUsesTOCBasePtr(DAG);
2800
12
    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2801
12
    return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
2802
12
  }
2803
2
2804
2
  unsigned MOHiFlag, MOLoFlag;
2805
2
  bool IsPIC = isPositionIndependent();
2806
2
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2807
2
  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2808
2
  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2809
2
  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2810
2
}
2811
2812
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2813
63
                                              SelectionDAG &DAG) const {
2814
63
  // FIXME: TLS addresses currently use medium model code sequences,
2815
63
  // which is the most useful form.  Eventually support for small and
2816
63
  // large models could be added if users need it, at the cost of
2817
63
  // additional complexity.
2818
63
  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2819
63
  if (DAG.getTarget().useEmulatedTLS())
2820
6
    return LowerToTLSEmulatedModel(GA, DAG);
2821
57
2822
57
  SDLoc dl(GA);
2823
57
  const GlobalValue *GV = GA->getGlobal();
2824
57
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2825
57
  bool is64bit = Subtarget.isPPC64();
2826
57
  const Module *M = DAG.getMachineFunction().getFunction().getParent();
2827
57
  PICLevel::Level picLevel = M->getPICLevel();
2828
57
2829
57
  const TargetMachine &TM = getTargetMachine();
2830
57
  TLSModel::Model Model = TM.getTLSModel(GV);
2831
57
2832
57
  if (Model == TLSModel::LocalExec) {
2833
10
    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2834
10
                                               PPCII::MO_TPREL_HA);
2835
10
    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2836
10
                                               PPCII::MO_TPREL_LO);
2837
10
    SDValue TLSReg = is64bit ? 
DAG.getRegister(PPC::X13, MVT::i64)9
2838
10
                             : 
DAG.getRegister(PPC::R2, MVT::i32)1
;
2839
10
2840
10
    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2841
10
    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2842
10
  }
2843
47
2844
47
  if (Model == TLSModel::InitialExec) {
2845
17
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2846
17
    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2847
17
                                                PPCII::MO_TLS);
2848
17
    SDValue GOTPtr;
2849
17
    if (is64bit) {
2850
15
      setUsesTOCBasePtr(DAG);
2851
15
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2852
15
      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2853
15
                           PtrVT, GOTReg, TGA);
2854
15
    } else {
2855
2
      if (!TM.isPositionIndependent())
2856
1
        GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2857
1
      else if (picLevel == PICLevel::SmallPIC)
2858
0
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2859
1
      else
2860
1
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2861
2
    }
2862
17
    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2863
17
                                   PtrVT, TGA, GOTPtr);
2864
17
    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2865
17
  }
2866
30
2867
30
  if (Model == TLSModel::GeneralDynamic) {
2868
20
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2869
20
    SDValue GOTPtr;
2870
20
    if (is64bit) {
2871
13
      setUsesTOCBasePtr(DAG);
2872
13
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2873
13
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2874
13
                                   GOTReg, TGA);
2875
13
    } else {
2876
7
      if (picLevel == PICLevel::SmallPIC)
2877
1
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2878
6
      else
2879
6
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2880
7
    }
2881
20
    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2882
20
                       GOTPtr, TGA, TGA);
2883
20
  }
2884
10
2885
10
  if (Model == TLSModel::LocalDynamic) {
2886
10
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2887
10
    SDValue GOTPtr;
2888
10
    if (is64bit) {
2889
7
      setUsesTOCBasePtr(DAG);
2890
7
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2891
7
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2892
7
                           GOTReg, TGA);
2893
7
    } else {
2894
3
      if (picLevel == PICLevel::SmallPIC)
2895
0
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2896
3
      else
2897
3
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2898
3
    }
2899
10
    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2900
10
                                  PtrVT, GOTPtr, TGA, TGA);
2901
10
    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2902
10
                                      PtrVT, TLSAddr, TGA);
2903
10
    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2904
10
  }
2905
0
2906
0
  llvm_unreachable("Unknown TLS model!");
2907
0
}
2908
2909
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2910
2.03k
                                              SelectionDAG &DAG) const {
2911
2.03k
  EVT PtrVT = Op.getValueType();
2912
2.03k
  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2913
2.03k
  SDLoc DL(GSDN);
2914
2.03k
  const GlobalValue *GV = GSDN->getGlobal();
2915
2.03k
2916
2.03k
  // 64-bit SVR4 ABI code is always position-independent.
2917
2.03k
  // The actual address of the GlobalValue is stored in the TOC.
2918
2.03k
  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2919
1.79k
    setUsesTOCBasePtr(DAG);
2920
1.79k
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2921
1.79k
    return getTOCEntry(DAG, DL, true, GA);
2922
1.79k
  }
2923
233
2924
233
  unsigned MOHiFlag, MOLoFlag;
2925
233
  bool IsPIC = isPositionIndependent();
2926
233
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2927
233
2928
233
  if (IsPIC && 
Subtarget.isSVR4ABI()18
) {
2929
18
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2930
18
                                            GSDN->getOffset(),
2931
18
                                            PPCII::MO_PIC_FLAG);
2932
18
    return getTOCEntry(DAG, DL, false, GA);
2933
18
  }
2934
215
2935
215
  SDValue GAHi =
2936
215
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2937
215
  SDValue GALo =
2938
215
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2939
215
2940
215
  SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2941
215
2942
215
  // If the global reference is actually to a non-lazy-pointer, we have to do an
2943
215
  // extra load to get the address of the global.
2944
215
  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2945
0
    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2946
215
  return Ptr;
2947
215
}
2948
2949
38
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2950
38
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2951
38
  SDLoc dl(Op);
2952
38
2953
38
  if (Op.getValueType() == MVT::v2i64) {
2954
15
    // When the operands themselves are v2i64 values, we need to do something
2955
15
    // special because VSX has no underlying comparison operations for these.
2956
15
    if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2957
12
      // Equality can be handled by casting to the legal type for Altivec
2958
12
      // comparisons, everything else needs to be expanded.
2959
12
      if (CC == ISD::SETEQ || 
CC == ISD::SETNE9
) {
2960
6
        return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2961
6
                 DAG.getSetCC(dl, MVT::v4i32,
2962
6
                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2963
6
                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2964
6
                   CC));
2965
6
      }
2966
6
2967
6
      return SDValue();
2968
6
    }
2969
3
2970
3
    // We handle most of these in the usual way.
2971
3
    return Op;
2972
3
  }
2973
23
2974
23
  // If we're comparing for equality to zero, expose the fact that this is
2975
23
  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2976
23
  // fold the new nodes.
2977
23
  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2978
6
    return V;
2979
17
2980
17
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2981
15
    // Leave comparisons against 0 and -1 alone for now, since they're usually
2982
15
    // optimized.  FIXME: revisit this when we can custom lower all setcc
2983
15
    // optimizations.
2984
15
    if (C->isAllOnesValue() || 
C->isNullValue()14
)
2985
10
      return SDValue();
2986
7
  }
2987
7
2988
7
  // If we have an integer seteq/setne, turn it into a compare against zero
2989
7
  // by xor'ing the rhs with the lhs, which is faster than setting a
2990
7
  // condition register, reading it back out, and masking the correct bit.  The
2991
7
  // normal approach here uses sub to do this instead of xor.  Using xor exposes
2992
7
  // the result to other bit-twiddling opportunities.
2993
7
  EVT LHSVT = Op.getOperand(0).getValueType();
2994
7
  if (LHSVT.isInteger() && (CC == ISD::SETEQ || 
CC == ISD::SETNE5
)) {
2995
2
    EVT VT = Op.getValueType();
2996
2
    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2997
2
                                Op.getOperand(1));
2998
2
    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2999
2
  }
3000
5
  return SDValue();
3001
5
}
3002
3003
2
SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3004
2
  SDNode *Node = Op.getNode();
3005
2
  EVT VT = Node->getValueType(0);
3006
2
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3007
2
  SDValue InChain = Node->getOperand(0);
3008
2
  SDValue VAListPtr = Node->getOperand(1);
3009
2
  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3010
2
  SDLoc dl(Node);
3011
2
3012
2
  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3013
2
3014
2
  // gpr_index
3015
2
  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3016
2
                                    VAListPtr, MachinePointerInfo(SV), MVT::i8);
3017
2
  InChain = GprIndex.getValue(1);
3018
2
3019
2
  if (VT == MVT::i64) {
3020
0
    // Check if GprIndex is even
3021
0
    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3022
0
                                 DAG.getConstant(1, dl, MVT::i32));
3023
0
    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3024
0
                                DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3025
0
    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3026
0
                                          DAG.getConstant(1, dl, MVT::i32));
3027
0
    // Align GprIndex to be even if it isn't
3028
0
    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3029
0
                           GprIndex);
3030
0
  }
3031
2
3032
2
  // fpr index is 1 byte after gpr
3033
2
  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3034
2
                               DAG.getConstant(1, dl, MVT::i32));
3035
2
3036
2
  // fpr
3037
2
  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3038
2
                                    FprPtr, MachinePointerInfo(SV), MVT::i8);
3039
2
  InChain = FprIndex.getValue(1);
3040
2
3041
2
  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3042
2
                                       DAG.getConstant(8, dl, MVT::i32));
3043
2
3044
2
  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3045
2
                                        DAG.getConstant(4, dl, MVT::i32));
3046
2
3047
2
  // areas
3048
2
  SDValue OverflowArea =
3049
2
      DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3050
2
  InChain = OverflowArea.getValue(1);
3051
2
3052
2
  SDValue RegSaveArea =
3053
2
      DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3054
2
  InChain = RegSaveArea.getValue(1);
3055
2
3056
2
  // select overflow_area if index > 8
3057
2
  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : 
FprIndex0
,
3058
2
                            DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3059
2
3060
2
  // adjustment constant gpr_index * 4/8
3061
2
  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3062
2
                                    VT.isInteger() ? GprIndex : 
FprIndex0
,
3063
2
                                    DAG.getConstant(VT.isInteger() ? 4 : 
80
, dl,
3064
2
                                                    MVT::i32));
3065
2
3066
2
  // OurReg = RegSaveArea + RegConstant
3067
2
  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3068
2
                               RegConstant);
3069
2
3070
2
  // Floating types are 32 bytes into RegSaveArea
3071
2
  if (VT.isFloatingPoint())
3072
0
    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3073
0
                         DAG.getConstant(32, dl, MVT::i32));
3074
2
3075
2
  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3076
2
  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3077
2
                                   VT.isInteger() ? GprIndex : 
FprIndex0
,
3078
2
                                   DAG.getConstant(VT == MVT::i64 ? 
20
: 1, dl,
3079
2
                                                   MVT::i32));
3080
2
3081
2
  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3082
2
                              VT.isInteger() ? VAListPtr : 
FprPtr0
,
3083
2
                              MachinePointerInfo(SV), MVT::i8);
3084
2
3085
2
  // determine if we should load from reg_save_area or overflow_area
3086
2
  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3087
2
3088
2
  // increase overflow_area by 4/8 if gpr/fpr > 8
3089
2
  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3090
2
                                          DAG.getConstant(VT.isInteger() ? 4 : 
80
,
3091
2
                                          dl, MVT::i32));
3092
2
3093
2
  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3094
2
                             OverflowAreaPlusN);
3095
2
3096
2
  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3097
2
                              MachinePointerInfo(), MVT::i32);
3098
2
3099
2
  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3100
2
}
3101
3102
1
SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3103
1
  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3104
1
3105
1
  // We have to copy the entire va_list struct:
3106
1
  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3107
1
  return DAG.getMemcpy(Op.getOperand(0), Op,
3108
1
                       Op.getOperand(1), Op.getOperand(2),
3109
1
                       DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3110
1
                       false, MachinePointerInfo(), MachinePointerInfo());
3111
1
}
3112
3113
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3114
1
                                                  SelectionDAG &DAG) const {
3115
1
  return Op.getOperand(0);
3116
1
}
3117
3118
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3119
1
                                                SelectionDAG &DAG) const {
3120
1
  SDValue Chain = Op.getOperand(0);
3121
1
  SDValue Trmp = Op.getOperand(1); // trampoline
3122
1
  SDValue FPtr = Op.getOperand(2); // nested function
3123
1
  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3124
1
  SDLoc dl(Op);
3125
1
3126
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3127
1
  bool isPPC64 = (PtrVT == MVT::i64);
3128
1
  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3129
1
3130
1
  TargetLowering::ArgListTy Args;
3131
1
  TargetLowering::ArgListEntry Entry;
3132
1
3133
1
  Entry.Ty = IntPtrTy;
3134
1
  Entry.Node = Trmp; Args.push_back(Entry);
3135
1
3136
1
  // TrampSize == (isPPC64 ? 48 : 40);
3137
1
  Entry.Node = DAG.getConstant(isPPC64 ? 
480
: 40, dl,
3138
1
                               isPPC64 ? 
MVT::i640
: MVT::i32);
3139
1
  Args.push_back(Entry);
3140
1
3141
1
  Entry.Node = FPtr; Args.push_back(Entry);
3142
1
  Entry.Node = Nest; Args.push_back(Entry);
3143
1
3144
1
  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3145
1
  TargetLowering::CallLoweringInfo CLI(DAG);
3146
1
  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3147
1
      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3148
1
      DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3149
1
3150
1
  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3151
1
  return CallResult.second;
3152
1
}
3153
3154
8
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3155
8
  MachineFunction &MF = DAG.getMachineFunction();
3156
8
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3157
8
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3158
8
3159
8
  SDLoc dl(Op);
3160
8
3161
8
  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3162
6
    // vastart just stores the address of the VarArgsFrameIndex slot into the
3163
6
    // memory location argument.
3164
6
    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3165
6
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3166
6
    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3167
6
                        MachinePointerInfo(SV));
3168
6
  }
3169
2
3170
2
  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3171
2
  // We suppose the given va_list is already allocated.
3172
2
  //
3173
2
  // typedef struct {
3174
2
  //  char gpr;     /* index into the array of 8 GPRs
3175
2
  //                 * stored in the register save area
3176
2
  //                 * gpr=0 corresponds to r3,
3177
2
  //                 * gpr=1 to r4, etc.
3178
2
  //                 */
3179
2
  //  char fpr;     /* index into the array of 8 FPRs
3180
2
  //                 * stored in the register save area
3181
2
  //                 * fpr=0 corresponds to f1,
3182
2
  //                 * fpr=1 to f2, etc.
3183
2
  //                 */
3184
2
  //  char *overflow_arg_area;
3185
2
  //                /* location on stack that holds
3186
2
  //                 * the next overflow argument
3187
2
  //                 */
3188
2
  //  char *reg_save_area;
3189
2
  //               /* where r3:r10 and f1:f8 (if saved)
3190
2
  //                * are stored
3191
2
  //                */
3192
2
  // } va_list[1];
3193
2
3194
2
  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3195
2
  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3196
2
  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3197
2
                                            PtrVT);
3198
2
  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3199
2
                                 PtrVT);
3200
2
3201
2
  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3202
2
  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3203
2
3204
2
  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3205
2
  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3206
2
3207
2
  uint64_t FPROffset = 1;
3208
2
  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3209
2
3210
2
  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3211
2
3212
2
  // Store first byte : number of int regs
3213
2
  SDValue firstStore =
3214
2
      DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3215
2
                        MachinePointerInfo(SV), MVT::i8);
3216
2
  uint64_t nextOffset = FPROffset;
3217
2
  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3218
2
                                  ConstFPROffset);
3219
2
3220
2
  // Store second byte : number of float regs
3221
2
  SDValue secondStore =
3222
2
      DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3223
2
                        MachinePointerInfo(SV, nextOffset), MVT::i8);
3224
2
  nextOffset += StackOffset;
3225
2
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3226
2
3227
2
  // Store second word : arguments given on stack
3228
2
  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3229
2
                                    MachinePointerInfo(SV, nextOffset));
3230
2
  nextOffset += FrameOffset;
3231
2
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3232
2
3233
2
  // Store third word : arguments given in registers
3234
2
  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3235
2
                      MachinePointerInfo(SV, nextOffset));
3236
2
}
3237
3238
/// FPR - The set of FP registers that should be allocated for arguments,
3239
/// on Darwin.
3240
static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3241
                                PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3242
                                PPC::F11, PPC::F12, PPC::F13};
3243
3244
/// QFPR - The set of QPX registers that should be allocated for arguments.
3245
static const MCPhysReg QFPR[] = {
3246
    PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3247
    PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3248
3249
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3250
/// the stack.
3251
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3252
26.1k
                                       unsigned PtrByteSize) {
3253
26.1k
  unsigned ArgSize = ArgVT.getStoreSize();
3254
26.1k
  if (Flags.isByVal())
3255
144
    ArgSize = Flags.getByValSize();
3256
26.1k
3257
26.1k
  // Round up to multiples of the pointer size, except for array members,
3258
26.1k
  // which are always packed.
3259
26.1k
  if (!Flags.isInConsecutiveRegs())
3260
23.5k
    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3261
26.1k
3262
26.1k
  return ArgSize;
3263
26.1k
}
3264
3265
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3266
/// on the stack.
3267
static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3268
                                            ISD::ArgFlagsTy Flags,
3269
48.6k
                                            unsigned PtrByteSize) {
3270
48.6k
  unsigned Align = PtrByteSize;
3271
48.6k
3272
48.6k
  // Altivec parameters are padded to a 16 byte boundary.
3273
48.6k
  if (ArgVT == MVT::v4f32 || 
ArgVT == MVT::v4i3247.3k
||
3274
48.6k
      
ArgVT == MVT::v8i1644.3k
||
ArgVT == MVT::v16i843.1k
||
3275
48.6k
      
ArgVT == MVT::v2f6441.4k
||
ArgVT == MVT::v2i6440.7k
||
3276
48.6k
      
ArgVT == MVT::v1i12839.1k
||
ArgVT == MVT::f12838.9k
)
3277
9.96k
    Align = 16;
3278
38.6k
  // QPX vector types stored in double-precision are padded to a 32 byte
3279
38.6k
  // boundary.
3280
38.6k
  else if (ArgVT == MVT::v4f64 || 
ArgVT == MVT::v4i138.4k
)
3281
266
    Align = 32;
3282
48.6k
3283
48.6k
  // ByVal parameters are aligned as requested.
3284
48.6k
  if (Flags.isByVal()) {
3285
282
    unsigned BVAlign = Flags.getByValAlign();
3286
282
    if (BVAlign > PtrByteSize) {
3287
48
      if (BVAlign % PtrByteSize != 0)
3288
48
          
llvm_unreachable0
(
3289
48
            "ByVal alignment is not a multiple of the pointer size");
3290
48
3291
48
      Align = BVAlign;
3292
48
    }
3293
282
  }
3294
48.6k
3295
48.6k
  // Array members are always packed to their original alignment.
3296
48.6k
  if (Flags.isInConsecutiveRegs()) {
3297
4.39k
    // If the array member was split into multiple registers, the first
3298
4.39k
    // needs to be aligned to the size of the full type.  (Except for
3299
4.39k
    // ppcf128, which is only aligned as its f64 components.)
3300
4.39k
    if (Flags.isSplit() && 
OrigVT != MVT::ppcf128260
)
3301
152
      Align = OrigVT.getStoreSize();
3302
4.24k
    else
3303
4.24k
      Align = ArgVT.getStoreSize();
3304
4.39k
  }
3305
48.6k
3306
48.6k
  return Align;
3307
48.6k
}
3308
3309
/// CalculateStackSlotUsed - Return whether this argument will use its
3310
/// stack slot (instead of being passed in registers).  ArgOffset,
3311
/// AvailableFPRs, and AvailableVRs must hold the current argument
3312
/// position, and will be updated to account for this argument.
3313
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3314
                                   ISD::ArgFlagsTy Flags,
3315
                                   unsigned PtrByteSize,
3316
                                   unsigned LinkageSize,
3317
                                   unsigned ParamAreaSize,
3318
                                   unsigned &ArgOffset,
3319
                                   unsigned &AvailableFPRs,
3320
22.7k
                                   unsigned &AvailableVRs, bool HasQPX) {
3321
22.7k
  bool UseMemory = false;
3322
22.7k
3323
22.7k
  // Respect alignment of argument on the stack.
3324
22.7k
  unsigned Align =
3325
22.7k
    CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3326
22.7k
  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3327
22.7k
  // If there's no space left in the argument save area, we must
3328
22.7k
  // use memory (this check also catches zero-sized arguments).
3329
22.7k
  if (ArgOffset >= LinkageSize + ParamAreaSize)
3330
2.74k
    UseMemory = true;
3331
22.7k
3332
22.7k
  // Allocate argument on the stack.
3333
22.7k
  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3334
22.7k
  if (Flags.isInConsecutiveRegsLast())
3335
271
    ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3336
22.7k
  // If we overran the argument save area, we must use memory
3337
22.7k
  // (this check catches arguments passed partially in memory)
3338
22.7k
  if (ArgOffset > LinkageSize + ParamAreaSize)
3339
2.79k
    UseMemory = true;
3340
22.7k
3341
22.7k
  // However, if the argument is actually passed in an FPR or a VR,
3342
22.7k
  // we don't use memory after all.
3343
22.7k
  if (!Flags.isByVal()) {
3344
22.6k
    if (ArgVT == MVT::f32 || 
ArgVT == MVT::f6420.5k
||
3345
22.6k
        // QPX registers overlap with the scalar FP registers.
3346
22.6k
        
(18.1k
HasQPX18.1k
&&
(302
ArgVT == MVT::v4f32302
||
3347
302
                    
ArgVT == MVT::v4f64212
||
3348
302
                    
ArgVT == MVT::v4i1135
)))
3349
4.71k
      if (AvailableFPRs > 0) {
3350
4.53k
        --AvailableFPRs;
3351
4.53k
        return false;
3352
4.53k
      }
3353
18.0k
    if (ArgVT == MVT::v4f32 || 
ArgVT == MVT::v4i3217.5k
||
3354
18.0k
        
ArgVT == MVT::v8i1615.9k
||
ArgVT == MVT::v16i815.3k
||
3355
18.0k
        
ArgVT == MVT::v2f6414.4k
||
ArgVT == MVT::v2i6414.1k
||
3356
18.0k
        
ArgVT == MVT::v1i12813.3k
||
ArgVT == MVT::f12813.2k
)
3357
4.98k
      if (AvailableVRs > 0) {
3358
4.66k
        --AvailableVRs;
3359
4.66k
        return false;
3360
4.66k
      }
3361
13.5k
  }
3362
13.5k
3363
13.5k
  return UseMemory;
3364
13.5k
}
3365
3366
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3367
/// ensure minimum alignment required for target.
3368
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3369
11.1k
                                     unsigned NumBytes) {
3370
11.1k
  unsigned TargetAlign = Lowering->getStackAlignment();
3371
11.1k
  unsigned AlignMask = TargetAlign - 1;
3372
11.1k
  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3373
11.1k
  return NumBytes;
3374
11.1k
}
3375
3376
SDValue PPCTargetLowering::LowerFormalArguments(
3377
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3378
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3379
11.1k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3380
11.1k
  if (Subtarget.isSVR4ABI()) {
3381
11.1k
    if (Subtarget.isPPC64())
3382
10.1k
      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3383
10.1k
                                         dl, DAG, InVals);
3384
1.04k
    else
3385
1.04k
      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3386
1.04k
                                         dl, DAG, InVals);
3387
24
  } else {
3388
24
    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3389
24
                                       dl, DAG, InVals);
3390
24
  }
3391
11.1k
}
3392
3393
SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3394
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3395
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3396
1.04k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3397
1.04k
3398
1.04k
  // 32-bit SVR4 ABI Stack Frame Layout:
3399
1.04k
  //              +-----------------------------------+
3400
1.04k
  //        +-->  |            Back chain             |
3401
1.04k
  //        |     +-----------------------------------+
3402
1.04k
  //        |     | Floating-point register save area |
3403
1.04k
  //        |     +-----------------------------------+
3404
1.04k
  //        |     |    General register save area     |
3405
1.04k
  //        |     +-----------------------------------+
3406
1.04k
  //        |     |          CR save word             |
3407
1.04k
  //        |     +-----------------------------------+
3408
1.04k
  //        |     |         VRSAVE save word          |
3409
1.04k
  //        |     +-----------------------------------+
3410
1.04k
  //        |     |         Alignment padding         |
3411
1.04k
  //        |     +-----------------------------------+
3412
1.04k
  //        |     |     Vector register save area     |
3413
1.04k
  //        |     +-----------------------------------+
3414
1.04k
  //        |     |       Local variable space        |
3415
1.04k
  //        |     +-----------------------------------+
3416
1.04k
  //        |     |        Parameter list area        |
3417
1.04k
  //        |     +-----------------------------------+
3418
1.04k
  //        |     |           LR save word            |
3419
1.04k
  //        |     +-----------------------------------+
3420
1.04k
  // SP-->  +---  |            Back chain             |
3421
1.04k
  //              +-----------------------------------+
3422
1.04k
  //
3423
1.04k
  // Specifications:
3424
1.04k
  //   System V Application Binary Interface PowerPC Processor Supplement
3425
1.04k
  //   AltiVec Technology Programming Interface Manual
3426
1.04k
3427
1.04k
  MachineFunction &MF = DAG.getMachineFunction();
3428
1.04k
  MachineFrameInfo &MFI = MF.getFrameInfo();
3429
1.04k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3430
1.04k
3431
1.04k
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3432
1.04k
  // Potential tail calls could cause overwriting of argument stack slots.
3433
1.04k
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3434
1.04k
                       
(CallConv == CallingConv::Fast)4
);
3435
1.04k
  unsigned PtrByteSize = 4;
3436
1.04k
3437
1.04k
  // Assign locations to all of the incoming arguments.
3438
1.04k
  SmallVector<CCValAssign, 16> ArgLocs;
3439
1.04k
  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3440
1.04k
                 *DAG.getContext());
3441
1.04k
3442
1.04k
  // Reserve space for the linkage area on the stack.
3443
1.04k
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3444
1.04k
  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3445
1.04k
  if (useSoftFloat())
3446
24
    CCInfo.PreAnalyzeFormalArguments(Ins);
3447
1.04k
3448
1.04k
  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3449
1.04k
  CCInfo.clearWasPPCF128();
3450
1.04k
3451
2.90k
  for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i1.85k
) {
3452
1.85k
    CCValAssign &VA = ArgLocs[i];
3453
1.85k
3454
1.85k
    // Arguments stored in registers.
3455
1.85k
    if (VA.isRegLoc()) {
3456
1.76k
      const TargetRegisterClass *RC;
3457
1.76k
      EVT ValVT = VA.getValVT();
3458
1.76k
3459
1.76k
      switch (ValVT.getSimpleVT().SimpleTy) {
3460
1.76k
        default:
3461
0
          llvm_unreachable("ValVT not supported by formal arguments Lowering");
3462
1.76k
        case MVT::i1:
3463
1.23k
        case MVT::i32:
3464
1.23k
          RC = &PPC::GPRCRegClass;
3465
1.23k
          break;
3466
1.23k
        case MVT::f32:
3467
226
          if (Subtarget.hasP8Vector())
3468
0
            RC = &PPC::VSSRCRegClass;
3469
226
          else if (Subtarget.hasSPE())
3470
57
            RC = &PPC::SPE4RCRegClass;
3471
169
          else
3472
169
            RC = &PPC::F4RCRegClass;
3473
226
          break;
3474
1.23k
        case MVT::f64:
3475
281
          if (Subtarget.hasVSX())
3476
0
            RC = &PPC::VSFRCRegClass;
3477
281
          else if (Subtarget.hasSPE())
3478
60
            // SPE passes doubles in GPR pairs.
3479
60
            RC = &PPC::GPRCRegClass;
3480
221
          else
3481
221
            RC = &PPC::F8RCRegClass;
3482
281
          break;
3483
1.23k
        case MVT::v16i8:
3484
13
        case MVT::v8i16:
3485
13
        case MVT::v4i32:
3486
13
          RC = &PPC::VRRCRegClass;
3487
13
          break;
3488
13
        case MVT::v4f32:
3489
9
          RC = Subtarget.hasQPX() ? 
&PPC::QSRCRegClass0
: &PPC::VRRCRegClass;
3490
9
          break;
3491
13
        case MVT::v2f64:
3492
0
        case MVT::v2i64:
3493
0
          RC = &PPC::VRRCRegClass;
3494
0
          break;
3495
0
        case MVT::v4f64:
3496
0
          RC = &PPC::QFRCRegClass;
3497
0
          break;
3498
0
        case MVT::v4i1:
3499
0
          RC = &PPC::QBRCRegClass;
3500
0
          break;
3501
1.76k
      }
3502
1.76k
3503
1.76k
      SDValue ArgValue;
3504
1.76k
      // Transform the arguments stored in physical registers into
3505
1.76k
      // virtual ones.
3506
1.76k
      if (VA.getLocVT() == MVT::f64 && 
Subtarget.hasSPE()281
) {
3507
60
        assert(i + 1 < e && "No second half of double precision argument");
3508
60
        unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3509
60
        unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3510
60
        SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3511
60
        SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3512
60
        if (!Subtarget.isLittleEndian())
3513
60
          std::swap (ArgValueLo, ArgValueHi);
3514
60
        ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3515
60
                               ArgValueHi);
3516
1.70k
      } else {
3517
1.70k
        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3518
1.70k
        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3519
1.70k
                                      ValVT == MVT::i1 ? 
MVT::i323
:
ValVT1.69k
);
3520
1.70k
        if (ValVT == MVT::i1)
3521
3
          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3522
1.70k
      }
3523
1.76k
3524
1.76k
      InVals.push_back(ArgValue);
3525
1.76k
    } else {
3526
95
      // Argument stored in memory.
3527
95
      assert(VA.isMemLoc());
3528
95
3529
95
      // Get the extended size of the argument type in stack
3530
95
      unsigned ArgSize = VA.getLocVT().getStoreSize();
3531
95
      // Get the actual size of the argument type
3532
95
      unsigned ObjSize = VA.getValVT().getStoreSize();
3533
95
      unsigned ArgOffset = VA.getLocMemOffset();
3534
95
      // Stack objects in PPC32 are right justified.
3535
95
      ArgOffset += ArgSize - ObjSize;
3536
95
      int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3537
95
3538
95
      // Create load nodes to retrieve arguments from the stack.
3539
95
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3540
95
      InVals.push_back(
3541
95
          DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3542
95
    }
3543
1.85k
  }
3544
1.04k
3545
1.04k
  // Assign locations to all of the incoming aggregate by value arguments.
3546
1.04k
  // Aggregates passed by value are stored in the local variable space of the
3547
1.04k
  // caller's stack frame, right above the parameter list area.
3548
1.04k
  SmallVector<CCValAssign, 16> ByValArgLocs;
3549
1.04k
  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3550
1.04k
                      ByValArgLocs, *DAG.getContext());
3551
1.04k
3552
1.04k
  // Reserve stack space for the allocations in CCInfo.
3553
1.04k
  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3554
1.04k
3555
1.04k
  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3556
1.04k
3557
1.04k
  // Area that is at least reserved in the caller of this function.
3558
1.04k
  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3559
1.04k
  MinReservedArea = std::max(MinReservedArea, LinkageSize);
3560
1.04k
3561
1.04k
  // Set the size that is at least reserved in caller of this function.  Tail
3562
1.04k
  // call optimized function's reserved stack space needs to be aligned so that
3563
1.04k
  // taking the difference between two stack areas will result in an aligned
3564
1.04k
  // stack.
3565
1.04k
  MinReservedArea =
3566
1.04k
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3567
1.04k
  FuncInfo->setMinReservedArea(MinReservedArea);
3568
1.04k
3569
1.04k
  SmallVector<SDValue, 8> MemOps;
3570
1.04k
3571
1.04k
  // If the function takes variable number of arguments, make a frame index for
3572
1.04k
  // the start of the first vararg value... for expansion of llvm.va_start.
3573
1.04k
  if (isVarArg) {
3574
3
    static const MCPhysReg GPArgRegs[] = {
3575
3
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3576
3
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3577
3
    };
3578
3
    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3579
3
3580
3
    static const MCPhysReg FPArgRegs[] = {
3581
3
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3582
3
      PPC::F8
3583
3
    };
3584
3
    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3585
3
3586
3
    if (useSoftFloat() || hasSPE())
3587
0
       NumFPArgRegs = 0;
3588
3
3589
3
    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3590
3
    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3591
3
3592
3
    // Make room for NumGPArgRegs and NumFPArgRegs.
3593
3
    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3594
3
                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3595
3
3596
3
    FuncInfo->setVarArgsStackOffset(
3597
3
      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3598
3
                            CCInfo.getNextStackOffset(), true));
3599
3
3600
3
    FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3601
3
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3602
3
3603
3
    // The fixed integer arguments of a variadic function are stored to the
3604
3
    // VarArgsFrameIndex on the stack so that they may be loaded by
3605
3
    // dereferencing the result of va_next.
3606
27
    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; 
++GPRIndex24
) {
3607
24
      // Get an existing live-in vreg, or add a new one.
3608
24
      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3609
24
      if (!VReg)
3610
14
        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3611
24
3612
24
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3613
24
      SDValue Store =
3614
24
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3615
24
      MemOps.push_back(Store);
3616
24
      // Increment the address by four for the next argument to store
3617
24
      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3618
24
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3619
24
    }
3620
3
3621
3
    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3622
3
    // is set.
3623
3
    // The double arguments are stored to the VarArgsFrameIndex
3624
3
    // on the stack.
3625
27
    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; 
++FPRIndex24
) {
3626
24
      // Get an existing live-in vreg, or add a new one.
3627
24
      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3628
24
      if (!VReg)
3629
23
        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3630
24
3631
24
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3632
24
      SDValue Store =
3633
24
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3634
24
      MemOps.push_back(Store);
3635
24
      // Increment the address by eight for the next argument to store
3636
24
      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3637
24
                                         PtrVT);
3638
24
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3639
24
    }
3640
3
  }
3641
1.04k
3642
1.04k
  if (!MemOps.empty())
3643
3
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3644
1.04k
3645
1.04k
  return Chain;
3646
1.04k
}
3647
3648
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3649
// value to MVT::i64 and then truncate to the correct register size.
3650
SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3651
                                             EVT ObjectVT, SelectionDAG &DAG,
3652
                                             SDValue ArgVal,
3653
4.30k
                                             const SDLoc &dl) const {
3654
4.30k
  if (Flags.isSExt())
3655
1.59k
    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3656
1.59k
                         DAG.getValueType(ObjectVT));
3657
2.70k
  else if (Flags.isZExt())
3658
1.10k
    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3659
1.10k
                         DAG.getValueType(ObjectVT));
3660
4.30k
3661
4.30k
  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3662
4.30k
}
3663
3664
SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3665
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3666
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3667
10.1k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3668
10.1k
  // TODO: add description of PPC stack frame format, or at least some docs.
3669
10.1k
  //
3670
10.1k
  bool isELFv2ABI = Subtarget.isELFv2ABI();
3671
10.1k
  bool isLittleEndian = Subtarget.isLittleEndian();
3672
10.1k
  MachineFunction &MF = DAG.getMachineFunction();
3673
10.1k
  MachineFrameInfo &MFI = MF.getFrameInfo();
3674
10.1k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3675
10.1k
3676
10.1k
  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3677
10.1k
         "fastcc not supported on varargs functions");
3678
10.1k
3679
10.1k
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3680
10.1k
  // Potential tail calls could cause overwriting of argument stack slots.
3681
10.1k
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3682
10.1k
                       
(CallConv == CallingConv::Fast)2
);
3683
10.1k
  unsigned PtrByteSize = 8;
3684
10.1k
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3685
10.1k
3686
10.1k
  static const MCPhysReg GPR[] = {
3687
10.1k
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3688
10.1k
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3689
10.1k
  };
3690
10.1k
  static const MCPhysReg VR[] = {
3691
10.1k
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3692
10.1k
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3693
10.1k
  };
3694
10.1k
3695
10.1k
  const unsigned Num_GPR_Regs = array_lengthof(GPR);
3696
10.1k
  const unsigned Num_FPR_Regs = useSoftFloat() ? 
08
:
1310.1k
;
3697
10.1k
  const unsigned Num_VR_Regs  = array_lengthof(VR);
3698
10.1k
  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3699
10.1k
3700
10.1k
  // Do a first pass over the arguments to determine whether the ABI
3701
10.1k
  // guarantees that our caller has allocated the parameter save area
3702
10.1k
  // on its stack frame.  In the ELFv1 ABI, this is always the case;
3703
10.1k
  // in the ELFv2 ABI, it is true if this is a vararg function or if
3704
10.1k
  // any parameter is located in a stack slot.
3705
10.1k
3706
10.1k
  bool HasParameterArea = !isELFv2ABI || 
isVarArg5.19k
;
3707
10.1k
  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3708
10.1k
  unsigned NumBytes = LinkageSize;
3709
10.1k
  unsigned AvailableFPRs = Num_FPR_Regs;
3710
10.1k
  unsigned AvailableVRs = Num_VR_Regs;
3711
30.6k
  for (unsigned i = 0, e = Ins.size(); i != e; 
++i20.5k
) {
3712
20.5k
    if (Ins[i].Flags.isNest())
3713
1
      continue;
3714
20.5k
3715
20.5k
    if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3716
20.5k
                               PtrByteSize, LinkageSize, ParamAreaSize,
3717
20.5k
                               NumBytes, AvailableFPRs, AvailableVRs,
3718
20.5k
                               Subtarget.hasQPX()))
3719
1.46k
      HasParameterArea = true;
3720
20.5k
  }
3721
10.1k
3722
10.1k
  // Add DAG nodes to load the arguments or copy them out of registers.  On
3723
10.1k
  // entry to a function on PPC, the arguments start after the linkage area,
3724
10.1k
  // although the first ones are often in registers.
3725
10.1k
3726
10.1k
  unsigned ArgOffset = LinkageSize;
3727
10.1k
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3728
10.1k
  unsigned &QFPR_idx = FPR_idx;
3729
10.1k
  SmallVector<SDValue, 8> MemOps;
3730
10.1k
  Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3731
10.1k
  unsigned CurArgIdx = 0;
3732
30.6k
  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 
++ArgNo20.5k
) {
3733
20.5k
    SDValue ArgVal;
3734
20.5k
    bool needsLoad = false;
3735
20.5k
    EVT ObjectVT = Ins[ArgNo].VT;
3736
20.5k
    EVT OrigVT = Ins[ArgNo].ArgVT;
3737
20.5k
    unsigned ObjSize = ObjectVT.getStoreSize();
3738
20.5k
    unsigned ArgSize = ObjSize;
3739
20.5k
    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3740
20.5k
    if (Ins[ArgNo].isOrigArg()) {
3741
20.5k
      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3742
20.5k
      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3743
20.5k
    }
3744
20.5k
    // We re-align the argument offset for each argument, except when using the
3745
20.5k
    // fast calling convention, when we need to make sure we do that only when
3746
20.5k
    // we'll actually use a stack slot.
3747
20.5k
    unsigned CurArgOffset, Align;
3748
20.5k
    auto ComputeArgOffset = [&]() {
3749
19.1k
      /* Respect alignment of argument on the stack.  */
3750
19.1k
      Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3751
19.1k
      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3752
19.1k
      CurArgOffset = ArgOffset;
3753
19.1k
    };
3754
20.5k
3755
20.5k
    if (CallConv != CallingConv::Fast) {
3756
18.4k
      ComputeArgOffset();
3757
18.4k
3758
18.4k
      /* Compute GPR index associated with argument offset.  */
3759
18.4k
      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3760
18.4k
      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3761
18.4k
    }
3762
20.5k
3763
20.5k
    // FIXME the codegen can be much improved in some cases.
3764
20.5k
    // We do not have to keep everything in memory.
3765
20.5k
    if (Flags.isByVal()) {
3766
77
      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3767
77
3768
77
      if (CallConv == CallingConv::Fast)
3769
1
        ComputeArgOffset();
3770
77
3771
77
      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3772
77
      ObjSize = Flags.getByValSize();
3773
77
      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3774
77
      // Empty aggregate parameters do not take up registers.  Examples:
3775
77
      //   struct { } a;
3776
77
      //   union  { } b;
3777
77
      //   int c[0];
3778
77
      // etc.  However, we have to provide a place-holder in InVals, so
3779
77
      // pretend we have an 8-byte item at the current address for that
3780
77
      // purpose.
3781
77
      if (!ObjSize) {
3782
2
        int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3783
2
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3784
2
        InVals.push_back(FIN);
3785
2
        continue;
3786
2
      }
3787
75
3788
75
      // Create a stack object covering all stack doublewords occupied
3789
75
      // by the argument.  If the argument is (fully or partially) on
3790
75
      // the stack, or if the argument is fully in registers but the
3791
75
      // caller has allocated the parameter save anyway, we can refer
3792
75
      // directly to the caller's stack frame.  Otherwise, create a
3793
75
      // local copy in our own frame.
3794
75
      int FI;
3795
75
      if (HasParameterArea ||
3796
75
          
ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize2
)
3797
73
        FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3798
2
      else
3799
2
        FI = MFI.CreateStackObject(ArgSize, Align, false);
3800
75
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3801
75
3802
75
      // Handle aggregates smaller than 8 bytes.
3803
75
      if (ObjSize < PtrByteSize) {
3804
29
        // The value of the object is its address, which differs from the
3805
29
        // address of the enclosing doubleword on big-endian systems.
3806
29
        SDValue Arg = FIN;
3807
29
        if (!isLittleEndian) {
3808
28
          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3809
28
          Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3810
28
        }
3811
29
        InVals.push_back(Arg);
3812
29
3813
29
        if (GPR_idx != Num_GPR_Regs) {
3814
16
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3815
16
          FuncInfo->addLiveInAttr(VReg, Flags);
3816
16
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3817
16
          SDValue Store;
3818
16
3819
16
          if (ObjSize==1 || 
ObjSize==214
||
ObjSize==411
) {
3820
8
            EVT ObjType = (ObjSize == 1 ? 
MVT::i82
:
3821
8
                           
(ObjSize == 2 6
?
MVT::i163
:
MVT::i323
));
3822
8
            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3823
8
                                      MachinePointerInfo(&*FuncArg), ObjType);
3824
8
          } else {
3825
8
            // For sizes that don't fit a truncating store (3, 5, 6, 7),
3826
8
            // store the whole register as-is to the parameter save area
3827
8
            // slot.
3828
8
            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3829
8
                                 MachinePointerInfo(&*FuncArg));
3830
8
          }
3831
16
3832
16
          MemOps.push_back(Store);
3833
16
        }
3834
29
        // Whether we copied from a register or not, advance the offset
3835
29
        // into the parameter save area by a full doubleword.
3836
29
        ArgOffset += PtrByteSize;
3837
29
        continue;
3838
29
      }
3839
46
3840
46
      // The value of the object is its address, which is the address of
3841
46
      // its first stack doubleword.
3842
46
      InVals.push_back(FIN);
3843
46
3844
46
      // Store whatever pieces of the object are in registers to memory.
3845
157
      for (unsigned j = 0; j < ArgSize; 
j += PtrByteSize111
) {
3846
129
        if (GPR_idx == Num_GPR_Regs)
3847
18
          break;
3848
111
3849
111
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3850
111
        FuncInfo->addLiveInAttr(VReg, Flags);
3851
111
        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3852
111
        SDValue Addr = FIN;
3853
111
        if (j) {
3854
75
          SDValue Off = DAG.getConstant(j, dl, PtrVT);
3855
75
          Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3856
75
        }
3857
111
        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3858
111
                                     MachinePointerInfo(&*FuncArg, j));
3859
111
        MemOps.push_back(Store);
3860
111
        ++GPR_idx;
3861
111
      }
3862
46
      ArgOffset += ArgSize;
3863
46
      continue;
3864
46
    }
3865
20.4k
3866
20.4k
    switch (ObjectVT.getSimpleVT().SimpleTy) {
3867
20.4k
    
default: 0
llvm_unreachable0
("Unhandled argument type!");
3868
20.4k
    case MVT::i1:
3869
11.8k
    case MVT::i32:
3870
11.8k
    case MVT::i64:
3871
11.8k
      if (Flags.isNest()) {
3872
1
        // The 'nest' parameter, if any, is passed in R11.
3873
1
        unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3874
1
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3875
1
3876
1
        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3877
0
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3878
1
3879
1
        break;
3880
1
      }
3881
11.8k
3882
11.8k
      // These can be scalar arguments or elements of an integer array type
3883
11.8k
      // passed directly.  Clang may use those instead of "byval" aggregate
3884
11.8k
      // types to avoid forcing arguments to memory unnecessarily.
3885
11.8k
      if (GPR_idx != Num_GPR_Regs) {
3886
11.0k
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3887
11.0k
        FuncInfo->addLiveInAttr(VReg, Flags);
3888
11.0k
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3889
11.0k
3890
11.0k
        if (ObjectVT == MVT::i32 || 
ObjectVT == MVT::i16.95k
)
3891
4.30k
          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3892
4.30k
          // value to MVT::i64 and then truncate to the correct register size.
3893
4.30k
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3894
11.0k
      } else {
3895
750
        if (CallConv == CallingConv::Fast)
3896
360
          ComputeArgOffset();
3897
750
3898
750
        needsLoad = true;
3899
750
        ArgSize = PtrByteSize;
3900
750
      }
3901
11.8k
      if (CallConv != CallingConv::Fast || 
needsLoad778
)
3902
11.3k
        ArgOffset += 8;
3903
11.8k
      break;
3904
11.8k
3905
11.8k
    case MVT::f32:
3906
3.59k
    case MVT::f64:
3907
3.59k
      // These can be scalar arguments or elements of a float array type
3908
3.59k
      // passed directly.  The latter are used to implement ELFv2 homogenous
3909
3.59k
      // float aggregates.
3910
3.59k
      if (FPR_idx != Num_FPR_Regs) {
3911
3.43k
        unsigned VReg;
3912
3.43k
3913
3.43k
        if (ObjectVT == MVT::f32)
3914
1.57k
          VReg = MF.addLiveIn(FPR[FPR_idx],
3915
1.57k
                              Subtarget.hasP8Vector()
3916
1.57k
                                  ? 
&PPC::VSSRCRegClass444
3917
1.57k
                                  : 
&PPC::F4RCRegClass1.13k
);
3918
1.86k
        else
3919
1.86k
          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3920
1.86k
                                                ? 
&PPC::VSFRCRegClass809
3921
1.86k
                                                : 
&PPC::F8RCRegClass1.05k
);
3922
3.43k
3923
3.43k
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3924
3.43k
        ++FPR_idx;
3925
3.43k
      } else 
if (153
GPR_idx != Num_GPR_Regs153
&&
CallConv != CallingConv::Fast27
) {
3926
27
        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3927
27
        // once we support fp <-> gpr moves.
3928
27
3929
27
        // This can only ever happen in the presence of f32 array types,
3930
27
        // since otherwise we never run out of FPRs before running out
3931
27
        // of GPRs.
3932
27
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3933
27
        FuncInfo->addLiveInAttr(VReg, Flags);
3934
27
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3935
27
3936
27
        if (ObjectVT == MVT::f32) {
3937
24
          if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 
00
))
3938
9
            ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3939
9
                                 DAG.getConstant(32, dl, MVT::i32));
3940
24
          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3941
24
        }
3942
27
3943
27
        ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3944
126
      } else {
3945
126
        if (CallConv == CallingConv::Fast)
3946
123
          ComputeArgOffset();
3947
126
3948
126
        needsLoad = true;
3949
126
      }
3950
3.59k
3951
3.59k
      // When passing an array of floats, the array occupies consecutive
3952
3.59k
      // space in the argument area; only round up to the next doubleword
3953
3.59k
      // at the end of the array.  Otherwise, each float takes 8 bytes.
3954
3.59k
      if (CallConv != CallingConv::Fast || 
needsLoad673
) {
3955
3.04k
        ArgSize = Flags.isInConsecutiveRegs() ? 
ObjSize483
:
PtrByteSize2.55k
;
3956
3.04k
        ArgOffset += ArgSize;
3957
3.04k
        if (Flags.isInConsecutiveRegsLast())
3958
69
          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3959
3.04k
      }
3960
3.59k
      break;
3961
4.95k
    case MVT::v4f32:
3962
4.95k
    case MVT::v4i32:
3963
4.95k
    case MVT::v8i16:
3964
4.95k
    case MVT::v16i8:
3965
4.95k
    case MVT::v2f64:
3966
4.95k
    case MVT::v2i64:
3967
4.95k
    case MVT::v1i128:
3968
4.95k
    case MVT::f128:
3969
4.95k
      if (!Subtarget.hasQPX()) {
3970
4.86k
        // These can be scalar arguments or elements of a vector array type
3971
4.86k
        // passed directly.  The latter are used to implement ELFv2 homogenous
3972
4.86k
        // vector aggregates.
3973
4.86k
        if (VR_idx != Num_VR_Regs) {
3974
4.55k
          unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3975
4.55k
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3976
4.55k
          ++VR_idx;
3977
4.55k
        } else {
3978
313
          if (CallConv == CallingConv::Fast)
3979
164
            ComputeArgOffset();
3980
313
          needsLoad = true;
3981
313
        }
3982
4.86k
        if (CallConv != CallingConv::Fast || 
needsLoad661
)
3983
4.36k
          ArgOffset += 16;
3984
4.86k
        break;
3985
4.86k
      } // not QPX
3986
90
3987
90
      assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3988
90
             "Invalid QPX parameter type");
3989
90
      LLVM_FALLTHROUGH;
3990
90
3991
221
    case MVT::v4f64:
3992
221
    case MVT::v4i1:
3993
221
      // QPX vectors are treated like their scalar floating-point subregisters
3994
221
      // (except that they're larger).
3995
221
      unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 
1690
:
32131
;
3996
221
      if (QFPR_idx != Num_QFPR_Regs) {
3997
221
        const TargetRegisterClass *RC;
3998
221
        switch (ObjectVT.getSimpleVT().SimpleTy) {
3999
221
        
case MVT::v4f64: RC = &PPC::QFRCRegClass; break77
;
4000
221
        
case MVT::v4f32: RC = &PPC::QSRCRegClass; break90
;
4001
221
        
default: RC = &PPC::QBRCRegClass; break54
;
4002
221
        }
4003
221
4004
221
        unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4005
221
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4006
221
        ++QFPR_idx;
4007
221
      } else {
4008
0
        if (CallConv == CallingConv::Fast)
4009
0
          ComputeArgOffset();
4010
0
        needsLoad = true;
4011
0
      }
4012
221
      if (CallConv != CallingConv::Fast || 
needsLoad0
)
4013
221
        ArgOffset += Sz;
4014
221
      break;
4015
20.4k
    }
4016
20.4k
4017
20.4k
    // We need to load the argument to a virtual register if we determined
4018
20.4k
    // above that we ran out of physical registers of the appropriate type.
4019
20.4k
    if (needsLoad) {
4020
1.18k
      if (ObjSize < ArgSize && 
!isLittleEndian94
)
4021
90
        CurArgOffset += ArgSize - ObjSize;
4022
1.18k
      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4023
1.18k
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4024
1.18k
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4025
1.18k
    }
4026
20.4k
4027
20.4k
    InVals.push_back(ArgVal);
4028
20.4k
  }
4029
10.1k
4030
10.1k
  // Area that is at least reserved in the caller of this function.
4031
10.1k
  unsigned MinReservedArea;
4032
10.1k
  if (HasParameterArea)
4033
4.96k
    MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4034
5.14k
  else
4035
5.14k
    MinReservedArea = LinkageSize;
4036
10.1k
4037
10.1k
  // Set the size that is at least reserved in caller of this function.  Tail
4038
10.1k
  // call optimized functions' reserved stack space needs to be aligned so that
4039
10.1k
  // taking the difference between two stack areas will result in an aligned
4040
10.1k
  // stack.
4041
10.1k
  MinReservedArea =
4042
10.1k
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4043
10.1k
  FuncInfo->setMinReservedArea(MinReservedArea);
4044
10.1k
4045
10.1k
  // If the function takes variable number of arguments, make a frame index for
4046
10.1k
  // the start of the first vararg value... for expansion of llvm.va_start.
4047
10.1k
  if (isVarArg) {
4048
13
    int Depth = ArgOffset;
4049
13
4050
13
    FuncInfo->setVarArgsFrameIndex(
4051
13
      MFI.CreateFixedObject(PtrByteSize, Depth, true));
4052
13
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4053
13
4054
13
    // If this function is vararg, store any remaining integer argument regs
4055
13
    // to their spots on the stack so that they may be loaded by dereferencing
4056
13
    // the result of va_next.
4057
13
    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4058
104
         GPR_idx < Num_GPR_Regs; 
++GPR_idx91
) {
4059
91
      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4060
91
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4061
91
      SDValue Store =
4062
91
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4063
91
      MemOps.push_back(Store);
4064
91
      // Increment the address by four for the next argument to store
4065
91
      SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4066
91
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4067
91
    }
4068
13
  }
4069
10.1k
4070
10.1k
  if (!MemOps.empty())
4071
47
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4072
10.1k
4073
10.1k
  return Chain;
4074
10.1k
}
4075
4076
SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4077
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4078
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4079
24
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4080
24
  // TODO: add description of PPC stack frame format, or at least some docs.
4081
24
  //
4082
24
  MachineFunction &MF = DAG.getMachineFunction();
4083
24
  MachineFrameInfo &MFI = MF.getFrameInfo();
4084
24
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4085
24
4086
24
  EVT PtrVT = getPointerTy(MF.getDataLayout());
4087
24
  bool isPPC64 = PtrVT == MVT::i64;
4088
24
  // Potential tail calls could cause overwriting of argument stack slots.
4089
24
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4090
24
                       
(CallConv == CallingConv::Fast)0
);
4091
24
  unsigned PtrByteSize = isPPC64 ? 
812
:
412
;
4092
24
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4093
24
  unsigned ArgOffset = LinkageSize;
4094
24
  // Area that is at least reserved in caller of this function.
4095
24
  unsigned MinReservedArea = ArgOffset;
4096
24
4097
24
  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4098
24
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4099
24
    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4100
24
  };
4101
24
  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4102
24
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4103
24
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4104
24
  };
4105
24
  static const MCPhysReg VR[] = {
4106
24
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4107
24
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4108
24
  };
4109
24
4110
24
  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4111
24
  const unsigned Num_FPR_Regs = useSoftFloat() ? 
00
: 13;
4112
24
  const unsigned Num_VR_Regs  = array_lengthof( VR);
4113
24
4114
24
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4115
24
4116
24
  const MCPhysReg *GPR = isPPC64 ? 
GPR_6412
:
GPR_3212
;
4117
24
4118
24
  // In 32-bit non-varargs functions, the stack space for vectors is after the
4119
24
  // stack space for non-vectors.  We do not use this space unless we have
4120
24
  // too many vectors to fit in registers, something that only occurs in
4121
24
  // constructed examples:), but we have to walk the arglist to figure
4122
24
  // that out...for the pathological case, compute VecArgOffset as the
4123
24
  // start of the vector parameter area.  Computing VecArgOffset is the
4124
24
  // entire point of the following loop.
4125
24
  unsigned VecArgOffset = ArgOffset;
4126
24
  if (!isVarArg && !isPPC64) {
4127
12
    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4128
12
         
++ArgNo0
) {
4129
0
      EVT ObjectVT = Ins[ArgNo].VT;
4130
0
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4131
0
4132
0
      if (Flags.isByVal()) {
4133
0
        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4134
0
        unsigned ObjSize = Flags.getByValSize();
4135
0
        unsigned ArgSize =
4136
0
                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4137
0
        VecArgOffset += ArgSize;
4138
0
        continue;
4139
0
      }
4140
0
4141
0
      switch(ObjectVT.getSimpleVT().SimpleTy) {
4142
0
      default: llvm_unreachable("Unhandled argument type!");
4143
0
      case MVT::i1:
4144
0
      case MVT::i32:
4145
0
      case MVT::f32:
4146
0
        VecArgOffset += 4;
4147
0
        break;
4148
0
      case MVT::i64:  // PPC64
4149
0
      case MVT::f64:
4150
0
        // FIXME: We are guaranteed to be !isPPC64 at this point.
4151
0
        // Does MVT::i64 apply?
4152
0
        VecArgOffset += 8;
4153
0
        break;
4154
0
      case MVT::v4f32:
4155
0
      case MVT::v4i32:
4156
0
      case MVT::v8i16:
4157
0
      case MVT::v16i8:
4158
0
        // Nothing to do, we're only looking at Nonvector args here.
4159
0
        break;
4160
0
      }
4161
0
    }
4162
12
  }
4163
24
  // We've found where the vector parameter area in memory is.  Skip the
4164
24
  // first 12 parameters; these don't use that memory.
4165
24
  VecArgOffset = ((VecArgOffset+15)/16)*16;
4166
24
  VecArgOffset += 12*16;
4167
24
4168
24
  // Add DAG nodes to load the arguments or copy them out of registers.  On
4169
24
  // entry to a function on PPC, the arguments start after the linkage area,
4170
24
  // although the first ones are often in registers.
4171
24
4172
24
  SmallVector<SDValue, 8> MemOps;
4173
24
  unsigned nAltivecParamsAtEnd = 0;
4174
24
  Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4175
24
  unsigned CurArgIdx = 0;
4176
24
  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 
++ArgNo0
) {
4177
0
    SDValue ArgVal;
4178
0
    bool needsLoad = false;
4179
0
    EVT ObjectVT = Ins[ArgNo].VT;
4180
0
    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4181
0
    unsigned ArgSize = ObjSize;
4182
0
    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4183
0
    if (Ins[ArgNo].isOrigArg()) {
4184
0
      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4185
0
      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4186
0
    }
4187
0
    unsigned CurArgOffset = ArgOffset;
4188
0
4189
0
    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4190
0
    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4191
0
        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4192
0
      if (isVarArg || isPPC64) {
4193
0
        MinReservedArea = ((MinReservedArea+15)/16)*16;
4194
0
        MinReservedArea += CalculateStackSlotSize(ObjectVT,
4195
0
                                                  Flags,
4196
0
                                                  PtrByteSize);
4197
0
      } else  nAltivecParamsAtEnd++;
4198
0
    } else
4199
0
      // Calculate min reserved area.
4200
0
      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4201
0
                                                Flags,
4202
0
                                                PtrByteSize);
4203
0
4204
0
    // FIXME the codegen can be much improved in some cases.
4205
0
    // We do not have to keep everything in memory.
4206
0
    if (Flags.isByVal()) {
4207
0
      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4208
0
4209
0
      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4210
0
      ObjSize = Flags.getByValSize();
4211
0
      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4212
0
      // Objects of size 1 and 2 are right justified, everything else is
4213
0
      // left justified.  This means the memory address is adjusted forwards.
4214
0
      if (ObjSize==1 || ObjSize==2) {
4215
0
        CurArgOffset = CurArgOffset + (4 - ObjSize);
4216
0
      }
4217
0
      // The value of the object is its address.
4218
0
      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4219
0
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4220
0
      InVals.push_back(FIN);
4221
0
      if (ObjSize==1 || ObjSize==2) {
4222
0
        if (GPR_idx != Num_GPR_Regs) {
4223
0
          unsigned VReg;
4224
0
          if (isPPC64)
4225
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4226
0
          else
4227
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4228
0
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4229
0
          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4230
0
          SDValue Store =
4231
0
              DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4232
0
                                MachinePointerInfo(&*FuncArg), ObjType);
4233
0
          MemOps.push_back(Store);
4234
0
          ++GPR_idx;
4235
0
        }
4236
0
4237
0
        ArgOffset += PtrByteSize;
4238
0
4239
0
        continue;
4240
0
      }
4241
0
      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4242
0
        // Store whatever pieces of the object are in registers
4243
0
        // to memory.  ArgOffset will be the address of the beginning
4244
0
        // of the object.
4245
0
        if (GPR_idx != Num_GPR_Regs) {
4246
0
          unsigned VReg;
4247
0
          if (isPPC64)
4248
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4249
0
          else
4250
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4251
0
          int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4252
0
          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4253
0
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4254
0
          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4255
0
                                       MachinePointerInfo(&*FuncArg, j));
4256
0
          MemOps.push_back(Store);
4257
0
          ++GPR_idx;
4258
0
          ArgOffset += PtrByteSize;
4259
0
        } else {
4260
0
          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4261
0
          break;
4262
0
        }
4263
0
      }
4264
0
      continue;
4265
0
    }
4266
0
4267
0
    switch (ObjectVT.getSimpleVT().SimpleTy) {
4268
0
    default: llvm_unreachable("Unhandled argument type!");
4269
0
    case MVT::i1:
4270
0
    case MVT::i32:
4271
0
      if (!isPPC64) {
4272
0
        if (GPR_idx != Num_GPR_Regs) {
4273
0
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4274
0
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4275
0
4276
0
          if (ObjectVT == MVT::i1)
4277
0
            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4278
0
4279
0
          ++GPR_idx;
4280
0
        } else {
4281
0
          needsLoad = true;
4282
0
          ArgSize = PtrByteSize;
4283
0
        }
4284
0
        // All int arguments reserve stack space in the Darwin ABI.
4285
0
        ArgOffset += PtrByteSize;
4286
0
        break;
4287
0
      }
4288
0
      LLVM_FALLTHROUGH;
4289
0
    case MVT::i64:  // PPC64
4290
0
      if (GPR_idx != Num_GPR_Regs) {
4291
0
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4292
0
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4293
0
4294
0
        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4295
0
          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4296
0
          // value to MVT::i64 and then truncate to the correct register size.
4297
0
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4298
0
4299
0
        ++GPR_idx;
4300
0
      } else {
4301
0
        needsLoad = true;
4302
0
        ArgSize = PtrByteSize;
4303
0
      }
4304
0
      // All int arguments reserve stack space in the Darwin ABI.
4305
0
      ArgOffset += 8;
4306
0
      break;
4307
0
4308
0
    case MVT::f32:
4309
0
    case MVT::f64:
4310
0
      // Every 4 bytes of argument space consumes one of the GPRs available for
4311
0
      // argument passing.
4312
0
      if (GPR_idx != Num_GPR_Regs) {
4313
0
        ++GPR_idx;
4314
0
        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4315
0
          ++GPR_idx;
4316
0
      }
4317
0
      if (FPR_idx != Num_FPR_Regs) {
4318
0
        unsigned VReg;
4319
0
4320
0
        if (ObjectVT == MVT::f32)
4321
0
          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4322
0
        else
4323
0
          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4324
0
4325
0
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4326
0
        ++FPR_idx;
4327
0
      } else {
4328
0
        needsLoad = true;
4329
0
      }
4330
0
4331
0
      // All FP arguments reserve stack space in the Darwin ABI.
4332
0
      ArgOffset += isPPC64 ? 8 : ObjSize;
4333
0
      break;
4334
0
    case MVT::v4f32:
4335
0
    case MVT::v4i32:
4336
0
    case MVT::v8i16:
4337
0
    case MVT::v16i8:
4338
0
      // Note that vector arguments in registers don't reserve stack space,
4339
0
      // except in varargs functions.
4340
0
      if (VR_idx != Num_VR_Regs) {
4341
0
        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4342
0
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4343
0
        if (isVarArg) {
4344
0
          while ((ArgOffset % 16) != 0) {
4345
0
            ArgOffset += PtrByteSize;
4346
0
            if (GPR_idx != Num_GPR_Regs)
4347
0
              GPR_idx++;
4348
0
          }
4349
0
          ArgOffset += 16;
4350
0
          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4351
0
        }
4352
0
        ++VR_idx;
4353
0
      } else {
4354
0
        if (!isVarArg && !isPPC64) {
4355
0
          // Vectors go after all the nonvectors.
4356
0
          CurArgOffset = VecArgOffset;
4357
0
          VecArgOffset += 16;
4358
0
        } else {
4359
0
          // Vectors are aligned.
4360
0
          ArgOffset = ((ArgOffset+15)/16)*16;
4361
0
          CurArgOffset = ArgOffset;
4362
0
          ArgOffset += 16;
4363
0
        }
4364
0
        needsLoad = true;
4365
0
      }
4366
0
      break;
4367
0
    }
4368
0
4369
0
    // We need to load the argument to a virtual register if we determined above
4370
0
    // that we ran out of physical registers of the appropriate type.
4371
0
    if (needsLoad) {
4372
0
      int FI = MFI.CreateFixedObject(ObjSize,
4373
0
                                     CurArgOffset + (ArgSize - ObjSize),
4374
0
                                     isImmutable);
4375
0
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4376
0
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4377
0
    }
4378
0
4379
0
    InVals.push_back(ArgVal);
4380
0
  }
4381
24
4382
24
  // Allow for Altivec parameters at the end, if needed.
4383
24
  if (nAltivecParamsAtEnd) {
4384
0
    MinReservedArea = ((MinReservedArea+15)/16)*16;
4385
0
    MinReservedArea += 16*nAltivecParamsAtEnd;
4386
0
  }
4387
24
4388
24
  // Area that is at least reserved in the caller of this function.
4389
24
  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4390
24
4391
24
  // Set the size that is at least reserved in caller of this function.  Tail
4392
24
  // call optimized functions' reserved stack space needs to be aligned so that
4393
24
  // taking the difference between two stack areas will result in an aligned
4394
24
  // stack.
4395
24
  MinReservedArea =
4396
24
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4397
24
  FuncInfo->setMinReservedArea(MinReservedArea);
4398
24
4399
24
  // If the function takes variable number of arguments, make a frame index for
4400
24
  // the start of the first vararg value... for expansion of llvm.va_start.
4401
24
  if (isVarArg) {
4402
0
    int Depth = ArgOffset;
4403
0
4404
0
    FuncInfo->setVarArgsFrameIndex(
4405
0
      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4406
0
                            Depth, true));
4407
0
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4408
0
4409
0
    // If this function is vararg, store any remaining integer argument regs
4410
0
    // to their spots on the stack so that they may be loaded by dereferencing
4411
0
    // the result of va_next.
4412
0
    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4413
0
      unsigned VReg;
4414
0
4415
0
      if (isPPC64)
4416
0
        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4417
0
      else
4418
0
        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4419
0
4420
0
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4421
0
      SDValue Store =
4422
0
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4423
0
      MemOps.push_back(Store);
4424
0
      // Increment the address by four for the next argument to store
4425
0
      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4426
0
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4427
0
    }
4428
0
  }
4429
24
4430
24
  if (!MemOps.empty())
4431
0
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4432
24
4433
24
  return Chain;
4434
24
}
4435
4436
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4437
/// adjusted to accommodate the arguments for the tailcall.
4438
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4439
2.13k
                                   unsigned ParamSize) {
4440
2.13k
4441
2.13k
  if (!isTailCall) 
return 02.13k
;
4442
3
4443
3
  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4444
3
  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4445
3
  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4446
3
  // Remember only if the new adjustment is bigger.
4447
3
  if (SPDiff < FI->getTailCallSPDelta())
4448
0
    FI->setTailCallSPDelta(SPDiff);
4449
3
4450
3
  return SPDiff;
4451
3
}
4452
4453
static bool isFunctionGlobalAddress(SDValue Callee);
4454
4455
static bool
4456
callsShareTOCBase(const Function *Caller, SDValue Callee,
4457
1.83k
                    const TargetMachine &TM) {
4458
1.83k
   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4459
1.83k
   // don't have enough information to determine if the caller and calle share
4460
1.83k
   // the same  TOC base, so we have to pessimistically assume they don't for
4461
1.83k
   // correctness.
4462
1.83k
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4463
1.83k
   if (!G)
4464
511
     return false;
4465
1.32k
4466
1.32k
   const GlobalValue *GV = G->getGlobal();
4467
1.32k
  // The medium and large code models are expected to provide a sufficiently
4468
1.32k
  // large TOC to provide all data addressing needs of a module with a
4469
1.32k
  // single TOC. Since each module will be addressed with a single TOC then we
4470
1.32k
  // only need to check that caller and callee don't cross dso boundaries.
4471
1.32k
  if (CodeModel::Medium == TM.getCodeModel() ||
4472
1.32k
      
CodeModel::Large == TM.getCodeModel()56
)
4473
1.26k
    return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4474
55
4475
55
  // Otherwise we need to ensure callee and caller are in the same section,
4476
55
  // since the linker may allocate multiple TOCs, and we don't know which
4477
55
  // sections will belong to the same TOC base.
4478
55
4479
55
  if (!GV->isStrongDefinitionForLinker())
4480
33
    return false;
4481
22
4482
22
  // Any explicitly-specified sections and section prefixes must also match.
4483
22
  // Also, if we're using -ffunction-sections, then each function is always in
4484
22
  // a different section (the same is true for COMDAT functions).
4485
22
  if (TM.getFunctionSections() || GV->hasComdat() || 
Caller->hasComdat()20
||
4486
22
      
GV->getSection() != Caller->getSection()20
)
4487
4
    return false;
4488
18
  if (const auto *F = dyn_cast<Function>(GV)) {
4489
18
    if (F->getSectionPrefix() != Caller->getSectionPrefix())
4490
0
      return false;
4491
18
  }
4492
18
4493
18
  // If the callee might be interposed, then we can't assume the ultimate call
4494
18
  // target will be in the same section. Even in cases where we can assume that
4495
18
  // interposition won't happen, in any case where the linker might insert a
4496
18
  // stub to allow for interposition, we must generate code as though
4497
18
  // interposition might occur. To understand why this matters, consider a
4498
18
  // situation where: a -> b -> c where the arrows indicate calls. b and c are
4499
18
  // in the same section, but a is in a different module (i.e. has a different
4500
18
  // TOC base pointer). If the linker allows for interposition between b and c,
4501
18
  // then it will generate a stub for the call edge between b and c which will
4502
18
  // save the TOC pointer into the designated stack slot allocated by b. If we
4503
18
  // return true here, and therefore allow a tail call between b and c, that
4504
18
  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4505
18
  // pointer into the stack slot allocated by a (where the a -> b stub saved
4506
18
  // a's TOC base pointer). If we're not considering a tail call, but rather,
4507
18
  // whether a nop is needed after the call instruction in b, because the linker
4508
18
  // will insert a stub, it might complain about a missing nop if we omit it
4509
18
  // (although many don't complain in this case).
4510
18
  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4511
1
    return false;
4512
17
4513
17
  return true;
4514
17
}
4515
4516
static bool
4517
needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4518
53
                            const SmallVectorImpl<ISD::OutputArg> &Outs) {
4519
53
  assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4520
53
4521
53
  const unsigned PtrByteSize = 8;
4522
53
  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4523
53
4524
53
  static const MCPhysReg GPR[] = {
4525
53
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4526
53
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4527
53
  };
4528
53
  static const MCPhysReg VR[] = {
4529
53
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4530
53
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4531
53
  };
4532
53
4533
53
  const unsigned NumGPRs = array_lengthof(GPR);
4534
53
  const unsigned NumFPRs = 13;
4535
53
  const unsigned NumVRs = array_lengthof(VR);
4536
53
  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4537
53
4538
53
  unsigned NumBytes = LinkageSize;
4539
53
  unsigned AvailableFPRs = NumFPRs;
4540
53
  unsigned AvailableVRs = NumVRs;
4541
53
4542
359
  for (const ISD::OutputArg& Param : Outs) {
4543
359
    if (Param.Flags.isNest()) 
continue0
;
4544
359
4545
359
    if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4546
359
                               PtrByteSize, LinkageSize, ParamAreaSize,
4547
359
                               NumBytes, AvailableFPRs, AvailableVRs,
4548
359
                               Subtarget.hasQPX()))
4549
35
      return true;
4550
359
  }
4551
53
  
return false18
;
4552
53
}
4553
4554
static bool
4555
66
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4556
66
  if (CS.arg_size() != CallerFn->arg_size())
4557
11
    return false;
4558
55
4559
55
  ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4560
55
  ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4561
55
  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4562
55
4563
119
  for (; CalleeArgIter != CalleeArgEnd; 
++CalleeArgIter, ++CallerArgIter64
) {
4564
80
    const Value* CalleeArg = *CalleeArgIter;
4565
80
    const Value* CallerArg = &(*CallerArgIter);
4566
80
    if (CalleeArg == CallerArg)
4567
60
      continue;
4568
20
4569
20
    // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4570
20
    //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4571
20
    //      }
4572
20
    // 1st argument of callee is undef and has the same type as caller.
4573
20
    if (CalleeArg->getType() == CallerArg->getType() &&
4574
20
        
isa<UndefValue>(CalleeArg)12
)
4575
4
      continue;
4576
16
4577
16
    return false;
4578
16
  }
4579
55
4580
55
  
return true39
;
4581
55
}
4582
4583
// Returns true if TCO is possible between the callers and callees
4584
// calling conventions.
4585
static bool
4586
areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4587
344
                                    CallingConv::ID CalleeCC) {
4588
344
  // Tail calls are possible with fastcc and ccc.
4589
688
  auto isTailCallableCC  = [] (CallingConv::ID CC){
4590
688
      return  CC == CallingConv::C || 
CC == CallingConv::Fast33
;
4591
688
  };
4592
344
  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4593
0
    return false;
4594
344
4595
344
  // We can safely tail call both fastcc and ccc callees from a c calling
4596
344
  // convention caller. If the caller is fastcc, we may have less stack space
4597
344
  // than a non-fastcc caller with the same signature so disable tail-calls in
4598
344
  // that case.
4599
344
  return CallerCC == CallingConv::C || 
CallerCC == CalleeCC2
;
4600
344
}
4601
4602
bool
4603
PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4604
                                    SDValue Callee,
4605
                                    CallingConv::ID CalleeCC,
4606
                                    ImmutableCallSite CS,
4607
                                    bool isVarArg,
4608
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4609
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4610
209
                                    SelectionDAG& DAG) const {
4611
209
  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4612
209
4613
209
  if (DisableSCO && 
!TailCallOpt0
)
return false0
;
4614
209
4615
209
  // Variadic argument functions are not supported.
4616
209
  if (isVarArg) 
return false7
;
4617
202
4618
202
  auto &Caller = DAG.getMachineFunction().getFunction();
4619
202
  // Check that the calling conventions are compatible for tco.
4620
202
  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4621
0
    return false;
4622
202
4623
202
  // Caller contains any byval parameter is not supported.
4624
202
  if (any_of(Ins, [](const ISD::InputArg &IA) 
{ return IA.Flags.isByVal(); }74
))
4625
0
    return false;
4626
202
4627
202
  // Callee contains any byval parameter is not supported, too.
4628
202
  // Note: This is a quick work around, because in some cases, e.g.
4629
202
  // caller's stack size > callee's stack size, we are still able to apply
4630
202
  // sibling call optimization. For example, gcc is able to do SCO for caller1
4631
202
  // in the following example, but not for caller2.
4632
202
  //   struct test {
4633
202
  //     long int a;
4634
202
  //     char ary[56];
4635
202
  //   } gTest;
4636
202
  //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4637
202
  //     b->a = v.a;
4638
202
  //     return 0;
4639
202
  //   }
4640
202
  //   void caller1(struct test a, struct test c, struct test *b) {
4641
202
  //     callee(gTest, b); }
4642
202
  //   void caller2(struct test *b) { callee(gTest, b); }
4643
2.07k
  
if (202
any_of(Outs, [](const ISD::OutputArg& OA) 202
{ return OA.Flags.isByVal(); }))
4644
6
    return false;
4645
196
4646
196
  // If callee and caller use different calling conventions, we cannot pass
4647
196
  // parameters on stack since offsets for the parameter area may be different.
4648
196
  if (Caller.getCallingConv() != CalleeCC &&
4649
196
      
needStackSlotPassParameters(Subtarget, Outs)26
)
4650
26
    return false;
4651
170
4652
170
  // No TCO/SCO on indirect call because Caller have to restore its TOC
4653
170
  if (!isFunctionGlobalAddress(Callee) &&
4654
170
      
!isa<ExternalSymbolSDNode>(Callee)14
)
4655
13
    return false;
4656
157
4657
157
  // If the caller and callee potentially have different TOC bases then we
4658
157
  // cannot tail call since we need to restore the TOC pointer after the call.
4659
157
  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4660
157
  if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4661
90
    return false;
4662
67
4663
67
  // TCO allows altering callee ABI, so we don't have to check further.
4664
67
  if (CalleeCC == CallingConv::Fast && 
TailCallOpt2
)
4665
1
    return true;
4666
66
4667
66
  if (DisableSCO) 
return false0
;
4668
66
4669
66
  // If callee use the same argument list that caller is using, then we can
4670
66
  // apply SCO on this case. If it is not, then we need to check if callee needs
4671
66
  // stack for passing arguments.
4672
66
  if (!hasSameArgumentList(&Caller, CS) &&
4673
66
      
needStackSlotPassParameters(Subtarget, Outs)27
) {
4674
9
    return false;
4675
9
  }
4676
57
4677
57
  return true;
4678
57
}
4679
4680
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4681
/// for tail call optimization. Targets which want to do tail call
4682
/// optimization should implement this function.
4683
bool
4684
PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4685
                                                     CallingConv::ID CalleeCC,
4686
                                                     bool isVarArg,
4687
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
4688
15
                                                     SelectionDAG& DAG) const {
4689
15
  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4690
13
    return false;
4691
2
4692
2
  // Variable argument functions are not supported.
4693
2
  if (isVarArg)
4694
0
    return false;
4695
2
4696
2
  MachineFunction &MF = DAG.getMachineFunction();
4697
2
  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4698
2
  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4699
2
    // Functions containing by val parameters are not supported.
4700
4
    for (unsigned i = 0; i != Ins.size(); 
i++2
) {
4701
2
       ISD::ArgFlagsTy Flags = Ins[i].Flags;
4702
2
       if (Flags.isByVal()) 
return false0
;
4703
2
    }
4704
2
4705
2
    // Non-PIC/GOT tail calls are supported.
4706
2
    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4707
1
      return true;
4708
1
4709
1
    // At the moment we can only do local tail calls (in same module, hidden
4710
1
    // or protected) if we are generating PIC.
4711
1
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4712
1
      return G->getGlobal()->hasHiddenVisibility()
4713
1
          || G->getGlobal()->hasProtectedVisibility();
4714
0
  }
4715
0
4716
0
  return false;
4717
0
}
4718
4719
/// isCallCompatibleAddress - Return the immediate to use if the specified
4720
/// 32-bit value is representable in the immediate field of a BxA instruction.
4721
410
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4722
410
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4723
410
  if (!C) 
return nullptr405
;
4724
5
4725
5
  int Addr = C->getZExtValue();
4726
5
  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4727
5
      SignExtend32<26>(Addr) != Addr)
4728
0
    return nullptr;  // Top 6 bits have to be sext of immediate.
4729
5
4730
5
  return DAG
4731
5
      .getConstant(
4732
5
          (int)C->getZExtValue() >> 2, SDLoc(Op),
4733
5
          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4734
5
      .getNode();
4735
5
}
4736
4737
namespace {
4738
4739
struct TailCallArgumentInfo {
4740
  SDValue Arg;
4741
  SDValue FrameIdxOp;
4742
  int FrameIdx = 0;
4743
4744
64
  TailCallArgumentInfo() = default;
4745
};
4746
4747
} // end anonymous namespace
4748
4749
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4750
static void StoreTailCallArgumentsToStackSlot(
4751
    SelectionDAG &DAG, SDValue Chain,
4752
    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4753
3
    SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4754
3
  for (unsigned i = 0, e = TailCallArgs.size(); i != e; 
++i0
) {
4755
0
    SDValue Arg = TailCallArgs[i].Arg;
4756
0
    SDValue FIN = TailCallArgs[i].FrameIdxOp;
4757
0
    int FI = TailCallArgs[i].FrameIdx;
4758
0
    // Store relative to framepointer.
4759
0
    MemOpChains.push_back(DAG.getStore(
4760
0
        Chain, dl, Arg, FIN,
4761
0
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4762
0
  }
4763
3
}
4764
4765
/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4766
/// the appropriate stack slot for the tail call optimized function call.
4767
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4768
                                             SDValue OldRetAddr, SDValue OldFP,
4769
3
                                             int SPDiff, const SDLoc &dl) {
4770
3
  if (SPDiff) {
4771
3
    // Calculate the new stack slot for the return address.
4772
3
    MachineFunction &MF = DAG.getMachineFunction();
4773
3
    const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4774
3
    const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4775
3
    bool isPPC64 = Subtarget.isPPC64();
4776
3
    int SlotSize = isPPC64 ? 
81
:
42
;
4777
3
    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4778
3
    int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4779
3
                                                         NewRetAddrLoc, true);
4780
3
    EVT VT = isPPC64 ? 
MVT::i641
:
MVT::i322
;
4781
3
    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4782
3
    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4783
3
                         MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4784
3
4785
3
    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4786
3
    // slot as the FP is never overwritten.
4787
3
    if (Subtarget.isDarwinABI()) {
4788
0
      int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4789
0
      int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4790
0
                                                         true);
4791
0
      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4792
0
      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4793
0
                           MachinePointerInfo::getFixedStack(
4794
0
                               DAG.getMachineFunction(), NewFPIdx));
4795
0
    }
4796
3
  }
4797
3
  return Chain;
4798
3
}
4799
4800
/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4801
/// the position of the argument.
4802
static void
4803
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4804
                         SDValue Arg, int SPDiff, unsigned ArgOffset,
4805
64
                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4806
64
  int Offset = ArgOffset + SPDiff;
4807
64
  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4808
64
  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4809
64
  EVT VT = isPPC64 ? MVT::i64 : 
MVT::i320
;
4810
64
  SDValue FIN = DAG.getFrameIndex(FI, VT);
4811
64
  TailCallArgumentInfo Info;
4812
64
  Info.Arg = Arg;
4813
64
  Info.FrameIdxOp = FIN;
4814
64
  Info.FrameIdx = FI;
4815
64
  TailCallArguments.push_back(Info);
4816
64
}
4817
4818
/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4819
/// stack slot. Returns the chain as result and the loaded frame pointers in
4820
/// LROpOut/FPOpout. Used when tail calling.
4821
SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4822
    SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4823
2.19k
    SDValue &FPOpOut, const SDLoc &dl) const {
4824
2.19k
  if (SPDiff) {
4825
3
    // Load the LR and FP stack slot for later adjusting.
4826
3
    EVT VT = Subtarget.isPPC64() ? 
MVT::i641
:
MVT::i322
;
4827
3
    LROpOut = getReturnAddrFrameIndex(DAG);
4828
3
    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4829
3
    Chain = SDValue(LROpOut.getNode(), 1);
4830
3
4831
3
    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4832
3
    // slot as the FP is never overwritten.
4833
3
    if (Subtarget.isDarwinABI()) {
4834
0
      FPOpOut = getFramePointerFrameIndex(DAG);
4835
0
      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4836
0
      Chain = SDValue(FPOpOut.getNode(), 1);
4837
0
    }
4838
3
  }
4839
2.19k
  return Chain;
4840
2.19k
}
4841
4842
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4843
/// by "Src" to address "Dst" of size "Size".  Alignment information is
4844
/// specified by the specific parameter attribute. The copy will be passed as
4845
/// a byval function parameter.
4846
/// Sometimes what we are copying is the end of a larger object, the part that
4847
/// does not fit in registers.
4848
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4849
                                         SDValue Chain, ISD::ArgFlagsTy Flags,
4850
54
                                         SelectionDAG &DAG, const SDLoc &dl) {
4851
54
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4852
54
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4853
54
                       false, false, false, MachinePointerInfo(),
4854
54
                       MachinePointerInfo());
4855
54
}
4856
4857
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4858
/// tail calls.
4859
static void LowerMemOpCallTo(
4860
    SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4861
    SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4862
    bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4863
512
    SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4864
512
  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4865
512
  if (!isTailCall) {
4866
448
    if (isVector) {
4867
88
      SDValue StackPtr;
4868
88
      if (isPPC64)
4869
88
        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4870
0
      else
4871
0
        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4872
88
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4873
88
                           DAG.getConstant(ArgOffset, dl, PtrVT));
4874
88
    }
4875
448
    MemOpChains.push_back(
4876
448
        DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4877
448
    // Calculate and remember argument location.
4878
448
  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4879
64
                                  TailCallArguments);
4880
512
}
4881
4882
static void
4883
PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4884
                const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4885
                SDValue FPOp,
4886
3
                SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4887
3
  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4888
3
  // might overwrite each other in case of tail call optimization.
4889
3
  SmallVector<SDValue, 8> MemOpChains2;
4890
3
  // Do not flag preceding copytoreg stuff together with the following stuff.
4891
3
  InFlag = SDValue();
4892
3
  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4893
3
                                    MemOpChains2, dl);
4894
3
  if (!MemOpChains2.empty())
4895
0
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4896
3
4897
3
  // Store the return address to the appropriate stack slot.
4898
3
  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4899
3
4900
3
  // Emit callseq_end just before tailcall node.
4901
3
  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4902
3
                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4903
3
  InFlag = Chain.getValue(1);
4904
3
}
4905
4906
// Is this global address that of a function that can be called by name? (as
4907
// opposed to something that must hold a descriptor for an indirect call).
4908
4.14k
static bool isFunctionGlobalAddress(SDValue Callee) {
4909
4.14k
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4910
2.78k
    if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4911
2.78k
        
Callee.getOpcode() == ISD::TargetGlobalTLSAddress2.78k
)
4912
3
      return false;
4913
2.78k
4914
2.78k
    return G->getGlobal()->getValueType()->isFunctionTy();
4915
2.78k
  }
4916
1.35k
4917
1.35k
  return false;
4918
1.35k
}
4919
4920
static unsigned
4921
PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4922
            SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4923
            bool isPatchPoint, bool hasNest,
4924
            SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4925
            SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4926
2.21k
            ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4927
2.21k
  bool isPPC64 = Subtarget.isPPC64();
4928
2.21k
  bool isSVR4ABI = Subtarget.isSVR4ABI();
4929
2.21k
  bool isELFv2ABI = Subtarget.isELFv2ABI();
4930
2.21k
  bool isAIXABI = Subtarget.isAIXABI();
4931
2.21k
4932
2.21k
  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4933
2.21k
  NodeTys.push_back(MVT::Other);   // Returns a chain
4934
2.21k
  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4935
2.21k
4936
2.21k
  unsigned CallOpc = PPCISD::CALL;
4937
2.21k
4938
2.21k
  bool needIndirectCall = true;
4939
2.21k
  if (!isSVR4ABI || 
!isPPC642.19k
)
4940
410
    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4941
5
      // If this is an absolute destination address, use the munged value.
4942
5
      Callee = SDValue(Dest, 0);
4943
5
      needIndirectCall = false;
4944
5
    }
4945
2.21k
4946
2.21k
  // PC-relative references to external symbols should go through $stub, unless
4947
2.21k
  // we're building with the leopard linker or later, which automatically
4948
2.21k
  // synthesizes these stubs.
4949
2.21k
  const TargetMachine &TM = DAG.getTarget();
4950
2.21k
  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
4951
2.21k
  const GlobalValue *GV = nullptr;
4952
2.21k
  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4953
1.43k
    GV = G->getGlobal();
4954
2.21k
  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4955
2.21k
  bool UsePlt = !Local && 
Subtarget.isTargetELF()2.04k
&&
!isPPC642.02k
;
4956
2.21k
4957
2.21k
  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4958
2.21k
  // every direct call is) turn it into a TargetGlobalAddress /
4959
2.21k
  // TargetExternalSymbol node so that legalize doesn't hack it.
4960
2.21k
  if (isFunctionGlobalAddress(Callee)) {
4961
1.43k
    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4962
1.43k
4963
1.43k
    // A call to a TLS address is actually an indirect call to a
4964
1.43k
    // thread-specific pointer.
4965
1.43k
    unsigned OpFlags = 0;
4966
1.43k
    if (UsePlt)
4967
195
      OpFlags = PPCII::MO_PLT;
4968
1.43k
4969
1.43k
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4970
1.43k
                                        Callee.getValueType(), 0, OpFlags);
4971
1.43k
    needIndirectCall = false;
4972
1.43k
  }
4973
2.21k
4974
2.21k
  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4975
682
    unsigned char OpFlags = 0;
4976
682
4977
682
    if (UsePlt)
4978
172
      OpFlags = PPCII::MO_PLT;
4979
682
4980
682
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4981
682
                                         OpFlags);
4982
682
    needIndirectCall = false;
4983
682
  }
4984
2.21k
4985
2.21k
  if (isPatchPoint) {
4986
28
    // We'll form an invalid direct call when lowering a patchpoint; the full
4987
28
    // sequence for an indirect call is complicated, and many of the
4988
28
    // instructions introduced might have side effects (and, thus, can't be
4989
28
    // removed later). The call itself will be removed as soon as the
4990
28
    // argument/return lowering is complete, so the fact that it has the wrong
4991
28
    // kind of operands should not really matter.
4992
28
    needIndirectCall = false;
4993
28
  }
4994
2.21k
4995
2.21k
  if (needIndirectCall) {
4996
66
    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4997
66
    // to do the call, we can't use PPCISD::CALL.
4998
66
    SDValue MTCTROps[] = {Chain, Callee, InFlag};
4999
66
5000
66
    if (isSVR4ABI && isPPC64 && 
!isELFv2ABI61
) {
5001
30
      // Function pointers in the 64-bit SVR4 ABI do not point to the function
5002
30
      // entry point, but to the function descriptor (the function entry point
5003
30
      // address is part of the function descriptor though).
5004
30
      // The function descriptor is a three doubleword structure with the
5005
30
      // following fields: function entry point, TOC base address and
5006
30
      // environment pointer.
5007
30
      // Thus for a call through a function pointer, the following actions need
5008
30
      // to be performed:
5009
30
      //   1. Save the TOC of the caller in the TOC save area of its stack
5010
30
      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5011
30
      //   2. Load the address of the function entry point from the function
5012
30
      //      descriptor.
5013
30
      //   3. Load the TOC of the callee from the function descriptor into r2.
5014
30
      //   4. Load the environment pointer from the function descriptor into
5015
30
      //      r11.
5016
30
      //   5. Branch to the function entry point address.
5017
30
      //   6. On return of the callee, the TOC of the caller needs to be
5018
30
      //      restored (this is done in FinishCall()).
5019
30
      //
5020
30
      // The loads are scheduled at the beginning of the call sequence, and the
5021
30
      // register copies are flagged together to ensure that no other
5022
30
      // operations can be scheduled in between. E.g. without flagging the
5023
30
      // copies together, a TOC access in the caller could be scheduled between
5024
30
      // the assignment of the callee TOC and the branch to the callee, which
5025
30
      // results in the TOC access going through the TOC of the callee instead
5026
30
      // of going through the TOC of the caller, which leads to incorrect code.
5027
30
5028
30
      // Load the address of the function entry point from the function
5029
30
      // descriptor.
5030
30
      SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
5031
30
      if (LDChain.getValueType() == MVT::Glue)
5032
30
        LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
5033
30
5034
30
      auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5035
30
                          ? (MachineMemOperand::MODereferenceable |
5036
29
                             MachineMemOperand::MOInvariant)
5037
30
                          : 
MachineMemOperand::MONone1
;
5038
30
5039
30
      MachinePointerInfo MPI(CS ? CS.getCalledValue() : 
nullptr0
);
5040
30
      SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5041
30
                                        /* Alignment = */ 8, MMOFlags);
5042
30
5043
30
      // Load environment pointer into r11.
5044
30
      SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5045
30
      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5046
30
      SDValue LoadEnvPtr =
5047
30
          DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5048
30
                      /* Alignment = */ 8, MMOFlags);
5049
30
5050
30
      SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5051
30
      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5052
30
      SDValue TOCPtr =
5053
30
          DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5054
30
                      /* Alignment = */ 8, MMOFlags);
5055
30
5056
30
      setUsesTOCBasePtr(DAG);
5057
30
      SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5058
30
                                        InFlag);
5059
30
      Chain = TOCVal.getValue(0);
5060
30
      InFlag = TOCVal.getValue(1);
5061
30
5062
30
      // If the function call has an explicit 'nest' parameter, it takes the
5063
30
      // place of the environment pointer.
5064
30
      if (!hasNest) {
5065
29
        SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5066
29
                                          InFlag);
5067
29
5068
29
        Chain = EnvVal.getValue(0);
5069
29
        InFlag = EnvVal.getValue(1);
5070
29
      }
5071
30
5072
30
      MTCTROps[0] = Chain;
5073
30
      MTCTROps[1] = LoadFuncPtr;
5074
30
      MTCTROps[2] = InFlag;
5075
30
    }
5076
66
5077
66
    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5078
66
                        makeArrayRef(MTCTROps, InFlag.getNode() ? 
363
:
23
));
5079
66
    InFlag = Chain.getValue(1);
5080
66
5081
66
    NodeTys.clear();
5082
66
    NodeTys.push_back(MVT::Other);
5083
66
    NodeTys.push_back(MVT::Glue);
5084
66
    Ops.push_back(Chain);
5085
66
    CallOpc = PPCISD::BCTRL;
5086
66
    Callee.setNode(nullptr);
5087
66
    // Add use of X11 (holding environment pointer)
5088
66
    if (isSVR4ABI && isPPC64 && 
!isELFv2ABI61
&&
!hasNest30
)
5089
29
      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5090
66
    // Add CTR register as callee so a bctr can be emitted later.
5091
66
    if (isTailCall)
5092
0
      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5093
66
  }
5094
2.21k
5095
2.21k
  // If this is a direct call, pass the chain and the callee.
5096
2.21k
  if (Callee.getNode()) {
5097
2.14k
    Ops.push_back(Chain);
5098
2.14k
    Ops.push_back(Callee);
5099
2.14k
  }
5100
2.21k
  // If this is a tail call add stack pointer delta.
5101
2.21k
  if (isTailCall)
5102
60
    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5103
2.21k
5104
2.21k
  // Add argument registers to the end of the list so that they are known live
5105
2.21k
  // into the call.
5106
7.11k
  for (unsigned i = 0, e = RegsToPass.size(); i != e; 
++i4.90k
)
5107
4.90k
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5108
4.90k
                                  RegsToPass[i].second.getValueType()));
5109
2.21k
5110
2.21k
  // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5111
2.21k
  // live into the call.
5112
2.21k
  // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5113
2.21k
  if ((isSVR4ABI && 
isPPC642.19k
) ||
isAIXABI410
) {
5114
1.82k
    setUsesTOCBasePtr(DAG);
5115
1.82k
5116
1.82k
    // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5117
1.82k
    // no way to mark dependencies as implicit here.
5118
1.82k
    // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5119
1.82k
    if (!isPatchPoint)
5120
1.79k
      Ops.push_back(DAG.getRegister(isPPC64 ? 
PPC::X21.78k
5121
1.79k
                                            : 
PPC::R211
, PtrVT));
5122
1.82k
  }
5123
2.21k
5124
2.21k
  return CallOpc;
5125
2.21k
}
5126
5127
SDValue PPCTargetLowering::LowerCallResult(
5128
    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5129
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5130
2.15k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5131
2.15k
  SmallVector<CCValAssign, 16> RVLocs;
5132
2.15k
  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5133
2.15k
                    *DAG.getContext());
5134
2.15k
5135
2.15k
  CCRetInfo.AnalyzeCallResult(
5136
2.15k
      Ins, (Subtarget.isSVR4ABI() && 
CallConv == CallingConv::Cold2.13k
)
5137
2.15k
               ? 
RetCC_PPC_Cold2
5138
2.15k
               : 
RetCC_PPC2.15k
);
5139
2.15k
5140
2.15k
  // Copy all of the result registers out of their specified physreg.
5141
3.62k
  for (unsigned i = 0, e = RVLocs.size(); i != e; 
++i1.47k
) {
5142
1.47k
    CCValAssign &VA = RVLocs[i];
5143
1.47k
    assert(VA.isRegLoc() && "Can only return in registers!");
5144
1.47k
5145
1.47k
    SDValue Val;
5146
1.47k
5147
1.47k
    if (Subtarget.hasSPE() && 
VA.getLocVT() == MVT::f6412
) {
5148
2
      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5149
2
                                      InFlag);
5150
2
      Chain = Lo.getValue(1);
5151
2
      InFlag = Lo.getValue(2);
5152
2
      VA = RVLocs[++i]; // skip ahead to next loc
5153
2
      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5154
2
                                      InFlag);
5155
2
      Chain = Hi.getValue(1);
5156
2
      InFlag = Hi.getValue(2);
5157
2
      if (!Subtarget.isLittleEndian())
5158
2
        std::swap (Lo, Hi);
5159
2
      Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5160
1.47k
    } else {
5161
1.47k
      Val = DAG.getCopyFromReg(Chain, dl,
5162
1.47k
                               VA.getLocReg(), VA.getLocVT(), InFlag);
5163
1.47k
      Chain = Val.getValue(1);
5164
1.47k
      InFlag = Val.getValue(2);
5165
1.47k
    }
5166
1.47k
5167
1.47k
    switch (VA.getLocInfo()) {
5168
1.47k
    
default: 0
llvm_unreachable0
("Unknown loc info!");
5169
1.47k
    
case CCValAssign::Full: break1.23k
;
5170
1.47k
    case CCValAssign::AExt:
5171
60
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5172
60
      break;
5173
1.47k
    case CCValAssign::ZExt:
5174
29
      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5175
29
                        DAG.getValueType(VA.getValVT()));
5176
29
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5177
29
      break;
5178
1.47k
    case CCValAssign::SExt:
5179
154
      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5180
154
                        DAG.getValueType(VA.getValVT()));
5181
154
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5182
154
      break;
5183
1.47k
    }
5184
1.47k
5185
1.47k
    InVals.push_back(Val);
5186
1.47k
  }
5187
2.15k
5188
2.15k
  return Chain;
5189
2.15k
}
5190
5191
SDValue PPCTargetLowering::FinishCall(
5192
    CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5193
    bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5194
    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5195
    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5196
    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5197
2.21k
    SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5198
2.21k
  std::vector<EVT> NodeTys;
5199
2.21k
  SmallVector<SDValue, 8> Ops;
5200
2.21k
  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5201
2.21k
                                 SPDiff, isTailCall, isPatchPoint, hasNest,
5202
2.21k
                                 RegsToPass, Ops, NodeTys, CS, Subtarget);
5203
2.21k
5204
2.21k
  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5205
2.21k
  if (isVarArg && 
Subtarget.isSVR4ABI()144
&&
!Subtarget.isPPC64()144
)
5206
89
    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5207
2.21k
5208
2.21k
  // When performing tail call optimization the callee pops its arguments off
5209
2.21k
  // the stack. Account for this here so these bytes can be pushed back on in
5210
2.21k
  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5211
2.21k
  int BytesCalleePops =
5212
2.21k
    (CallConv == CallingConv::Fast &&
5213
2.21k
     
getTargetMachine().Options.GuaranteedTailCallOpt40
) ?
NumBytes3
:
02.20k
;
5214
2.21k
5215
2.21k
  // Add a register mask operand representing the call-preserved registers.
5216
2.21k
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5217
2.21k
  const uint32_t *Mask =
5218
2.21k
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5219
2.21k
  assert(Mask && "Missing call preserved mask for calling convention");
5220
2.21k
  Ops.push_back(DAG.getRegisterMask(Mask));
5221
2.21k
5222
2.21k
  if (InFlag.getNode())
5223
1.64k
    Ops.push_back(InFlag);
5224
2.21k
5225
2.21k
  // Emit tail call.
5226
2.21k
  if (isTailCall) {
5227
60
    assert(((Callee.getOpcode() == ISD::Register &&
5228
60
             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5229
60
            Callee.getOpcode() == ISD::TargetExternalSymbol ||
5230
60
            Callee.getOpcode() == ISD::TargetGlobalAddress ||
5231
60
            isa<ConstantSDNode>(Callee)) &&
5232
60
    "Expecting an global address, external symbol, absolute value or register");
5233
60
5234
60
    DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5235
60
    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5236
60
  }
5237
2.15k
5238
2.15k
  // Add a NOP immediately after the branch instruction when using the 64-bit
5239
2.15k
  // SVR4 or the AIX ABI.
5240
2.15k
  // At link time, if caller and callee are in a different module and
5241
2.15k
  // thus have a different TOC, the call will be replaced with a call to a stub
5242
2.15k
  // function which saves the current TOC, loads the TOC of the callee and
5243
2.15k
  // branches to the callee. The NOP will be replaced with a load instruction
5244
2.15k
  // which restores the TOC of the caller from the TOC save slot of the current
5245
2.15k
  // stack frame. If caller and callee belong to the same module (and have the
5246
2.15k
  // same TOC), the NOP will remain unchanged, or become some other NOP.
5247
2.15k
5248
2.15k
  MachineFunction &MF = DAG.getMachineFunction();
5249
2.15k
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5250
2.15k
  if (!isTailCall && !isPatchPoint &&
5251
2.15k
      
(2.12k
(2.12k
Subtarget.isSVR4ABI()2.12k
&&
Subtarget.isPPC64()2.10k
) ||
5252
2.12k
       
Subtarget.isAIXABI()408
)) {
5253
1.73k
    if (CallOpc == PPCISD::BCTRL) {
5254
61
      if (Subtarget.isAIXABI())
5255
0
        report_fatal_error("Indirect call on AIX is not implemented.");
5256
61
5257
61
      // This is a call through a function pointer.
5258
61
      // Restore the caller TOC from the save area into R2.
5259
61
      // See PrepareCall() for more information about calls through function
5260
61
      // pointers in the 64-bit SVR4 ABI.
5261
61
      // We are using a target-specific load with r2 hard coded, because the
5262
61
      // result of a target-independent load would never go directly into r2,
5263
61
      // since r2 is a reserved register (which prevents the register allocator
5264
61
      // from allocating it), resulting in an additional register being
5265
61
      // allocated and an unnecessary move instruction being generated.
5266
61
      CallOpc = PPCISD::BCTRL_LOAD_TOC;
5267
61
5268
61
      SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5269
61
      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5270
61
      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5271
61
      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5272
61
5273
61
      // The address needs to go after the chain input but before the flag (or
5274
61
      // any other variadic arguments).
5275
61
      Ops.insert(std::next(Ops.begin()), AddTOC);
5276
1.67k
    } else if (CallOpc == PPCISD::CALL &&
5277
1.67k
      !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
5278
1.59k
      // Otherwise insert NOP for non-local calls.
5279
1.59k
      CallOpc = PPCISD::CALL_NOP;
5280
1.59k
    }
5281
1.73k
  }
5282
2.15k
5283
2.15k
  if (Subtarget.isAIXABI() && 
isFunctionGlobalAddress(Callee)22
) {
5284
22
    // On AIX, direct function calls reference the symbol for the function's
5285
22
    // entry point, which is named by inserting a "." before the function's
5286
22
    // C-linkage name.
5287
22
    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
5288
22
    auto &Context = DAG.getMachineFunction().getMMI().getContext();
5289
22
    MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
5290
22
                                            Twine(G->getGlobal()->getName()));
5291
22
    Callee = DAG.getMCSymbol(S, PtrVT);
5292
22
    // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5293
22
    Ops[1] = Callee;
5294
22
  }
5295
2.15k
5296
2.15k
  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5297
2.15k
  InFlag = Chain.getValue(1);
5298
2.15k
5299
2.15k
  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5300
2.15k
                             DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5301
2.15k
                             InFlag, dl);
5302
2.15k
  if (!Ins.empty())
5303
1.27k
    InFlag = Chain.getValue(1);
5304
2.15k
5305
2.15k
  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5306
2.15k
                         Ins, dl, DAG, InVals);
5307
2.15k
}
5308
5309
SDValue
5310
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5311
2.21k
                             SmallVectorImpl<SDValue> &InVals) const {
5312
2.21k
  SelectionDAG &DAG                     = CLI.DAG;
5313
2.21k
  SDLoc &dl                             = CLI.DL;
5314
2.21k
  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5315
2.21k
  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5316
2.21k
  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5317
2.21k
  SDValue Chain                         = CLI.Chain;
5318
2.21k
  SDValue Callee                        = CLI.Callee;
5319
2.21k
  bool &isTailCall                      = CLI.IsTailCall;
5320
2.21k
  CallingConv::ID CallConv              = CLI.CallConv;
5321
2.21k
  bool isVarArg                         = CLI.IsVarArg;
5322
2.21k
  bool isPatchPoint                     = CLI.IsPatchPoint;
5323
2.21k
  ImmutableCallSite CS                  = CLI.CS;
5324
2.21k
5325
2.21k
  if (isTailCall) {
5326
225
    if (Subtarget.useLongCalls() && 
!(1
CS1
&&
CS.isMustTailCall()1
))
5327
1
      isTailCall = false;
5328
224
    else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5329
209
      isTailCall =
5330
209
        IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5331
209
                                                 isVarArg, Outs, Ins, DAG);
5332
15
    else
5333
15
      isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5334
15
                                                     Ins, DAG);
5335
225
    if (isTailCall) {
5336
60
      ++NumTailCalls;
5337
60
      if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5338
57
        ++NumSiblingCalls;
5339
60
5340
60
      assert(isa<GlobalAddressSDNode>(Callee) &&
5341
60
             "Callee should be an llvm::Function object.");
5342
60
      LLVM_DEBUG(
5343
60
          const GlobalValue *GV =
5344
60
              cast<GlobalAddressSDNode>(Callee)->getGlobal();
5345
60
          const unsigned Width =
5346
60
              80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5347
60
          dbgs() << "TCO caller: "
5348
60
                 << left_justify(DAG.getMachineFunction().getName(), Width)
5349
60
                 << ", callee linkage: " << GV->getVisibility() << ", "
5350
60
                 << GV->getLinkage() << "\n");
5351
60
    }
5352
225
  }
5353
2.21k
5354
2.21k
  if (!isTailCall && 
CS2.15k
&&
CS.isMustTailCall()1.44k
)
5355
0
    report_fatal_error("failed to perform tail call elimination on a call "
5356
0
                       "site marked musttail");
5357
2.21k
5358
2.21k
  // When long calls (i.e. indirect calls) are always used, calls are always
5359
2.21k
  // made via function pointer. If we have a function name, first translate it
5360
2.21k
  // into a pointer.
5361
2.21k
  if (Subtarget.useLongCalls() && 
isa<GlobalAddressSDNode>(Callee)1
&&
5362
2.21k
      
!isTailCall1
)
5363
1
    Callee = LowerGlobalAddress(Callee, DAG);
5364
2.21k
5365
2.21k
  if (Subtarget.isSVR4ABI() && 
Subtarget.isPPC64()2.19k
)
5366
1.80k
    return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5367
1.80k
                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
5368
1.80k
                            dl, DAG, InVals, CS);
5369
410
5370
410
  if (Subtarget.isSVR4ABI())
5371
388
    return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5372
388
                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
5373
388
                            dl, DAG, InVals, CS);
5374
22
5375
22
  if (Subtarget.isAIXABI())
5376
22
    return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
5377
22
                         isTailCall, isPatchPoint, Outs, OutVals, Ins,
5378
22
                         dl, DAG, InVals, CS);
5379
0
5380
0
  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5381
0
                          isTailCall, isPatchPoint, Outs, OutVals, Ins,
5382
0
                          dl, DAG, InVals, CS);
5383
0
}
5384
5385
SDValue PPCTargetLowering::LowerCall_32SVR4(
5386
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5387
    bool isTailCall, bool isPatchPoint,
5388
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5389
    const SmallVectorImpl<SDValue> &OutVals,
5390
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5391
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5392
388
    ImmutableCallSite CS) const {
5393
388
  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5394
388
  // of the 32-bit SVR4 ABI stack frame layout.
5395
388
5396
388
  assert((CallConv == CallingConv::C ||
5397
388
          CallConv == CallingConv::Cold ||
5398
388
          CallConv == CallingConv::Fast) && "Unknown calling convention!");
5399
388
5400
388
  unsigned PtrByteSize = 4;
5401
388
5402
388
  MachineFunction &MF = DAG.getMachineFunction();
5403
388
5404
388
  // Mark this function as potentially containing a function that contains a
5405
388
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5406
388
  // and restoring the callers stack pointer in this functions epilog. This is
5407
388
  // done because by tail calling the called function might overwrite the value
5408
388
  // in this function's (MF) stack pointer stack slot 0(SP).
5409
388
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5410
388
      
CallConv == CallingConv::Fast2
)
5411
2
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5412
388
5413
388
  // Count how many bytes are to be pushed on the stack, including the linkage
5414
388
  // area, parameter list area and the part of the local variable space which
5415
388
  // contains copies of aggregates which are passed by value.
5416
388
5417
388
  // Assign locations to all of the outgoing arguments.
5418
388
  SmallVector<CCValAssign, 16> ArgLocs;
5419
388
  PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5420
388
5421
388
  // Reserve space for the linkage area on the stack.
5422
388
  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5423
388
                       PtrByteSize);
5424
388
  if (useSoftFloat())
5425
27
    CCInfo.PreAnalyzeCallOperands(Outs);
5426
388
5427
388
  if (isVarArg) {
5428
89
    // Handle fixed and variable vector arguments differently.
5429
89
    // Fixed vector arguments go into registers as long as registers are
5430
89
    // available. Variable vector arguments always go into memory.
5431
89
    unsigned NumArgs = Outs.size();
5432
89
5433
318
    for (unsigned i = 0; i != NumArgs; 
++i229
) {
5434
229
      MVT ArgVT = Outs[i].VT;
5435
229
      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5436
229
      bool Result;
5437
229
5438
229
      if (Outs[i].IsFixed) {
5439
30
        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5440
30
                               CCInfo);
5441
199
      } else {
5442
199
        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5443
199
                                      ArgFlags, CCInfo);
5444
199
      }
5445
229
5446
229
      if (Result) {
5447
#ifndef NDEBUG
5448
        errs() << "Call operand #" << i << " has unhandled type "
5449
             << EVT(ArgVT).getEVTString() << "\n";
5450
#endif
5451
0
        llvm_unreachable(nullptr);
5452
0
      }
5453
229
    }
5454
299
  } else {
5455
299
    // All arguments are treated the same.
5456
299
    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5457
299
  }
5458
388
  CCInfo.clearWasPPCF128();
5459
388
5460
388
  // Assign locations to all of the outgoing aggregate by value arguments.
5461
388
  SmallVector<CCValAssign, 16> ByValArgLocs;
5462
388
  CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5463
388
5464
388
  // Reserve stack space for the allocations in CCInfo.
5465
388
  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5466
388
5467
388
  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5468
388
5469
388
  // Size of the linkage area, parameter list area and the part of the local
5470
388
  // space variable where copies of aggregates which are passed by value are
5471
388
  // stored.
5472
388
  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5473
388
5474
388
  // Calculate by how many bytes the stack has to be adjusted in case of tail
5475
388
  // call optimization.
5476
388
  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5477
388
5478
388
  // Adjust the stack pointer for the new arguments...
5479
388
  // These operations are automatically eliminated by the prolog/epilog pass
5480
388
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5481
388
  SDValue CallSeqStart = Chain;
5482
388
5483
388
  // Load the return address and frame pointer so it can be moved somewhere else
5484
388
  // later.
5485
388
  SDValue LROp, FPOp;
5486
388
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5487
388
5488
388
  // Set up a copy of the stack pointer for use loading and storing any
5489
388
  // arguments that may not fit in the registers available for argument
5490
388
  // passing.
5491
388
  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5492
388
5493
388
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5494
388
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5495
388
  SmallVector<SDValue, 8> MemOpChains;
5496
388
5497
388
  bool seenFloatArg = false;
5498
388
  // Walk the register/memloc assignments, inserting copies/loads.
5499
388
  // i - Tracks the index into the list of registers allocated for the call
5500
388
  // RealArgIdx - Tracks the index into the list of actual function arguments
5501
388
  // j - Tracks the index into the list of byval arguments
5502
388
  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5503
1.41k
       i != e;
5504
1.02k
       ++i, ++RealArgIdx) {
5505
1.02k
    CCValAssign &VA = ArgLocs[i];
5506
1.02k
    SDValue Arg = OutVals[RealArgIdx];
5507
1.02k
    ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5508
1.02k
5509
1.02k
    if (Flags.isByVal()) {
5510
2
      // Argument is an aggregate which is passed by value, thus we need to
5511
2
      // create a copy of it in the local variable space of the current stack
5512
2
      // frame (which is the stack frame of the caller) and pass the address of
5513
2
      // this copy to the callee.
5514
2
      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5515
2
      CCValAssign &ByValVA = ByValArgLocs[j++];
5516
2
      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5517
2
5518
2
      // Memory reserved in the local variable space of the callers stack frame.
5519
2
      unsigned LocMemOffset = ByValVA.getLocMemOffset();
5520
2
5521
2
      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5522
2
      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5523
2
                           StackPtr, PtrOff);
5524
2
5525
2
      // Create a copy of the argument in the local area of the current
5526
2
      // stack frame.
5527
2
      SDValue MemcpyCall =
5528
2
        CreateCopyOfByValArgument(Arg, PtrOff,
5529
2
                                  CallSeqStart.getNode()->getOperand(0),
5530
2
                                  Flags, DAG, dl);
5531
2
5532
2
      // This must go outside the CALLSEQ_START..END.
5533
2
      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5534
2
                                                     SDLoc(MemcpyCall));
5535
2
      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5536
2
                             NewCallSeqStart.getNode());
5537
2
      Chain = CallSeqStart = NewCallSeqStart;
5538
2
5539
2
      // Pass the address of the aggregate copy on the stack either in a
5540
2
      // physical register or in the parameter list area of the current stack
5541
2
      // frame to the callee.
5542
2
      Arg = PtrOff;
5543
2
    }
5544
1.02k
5545
1.02k
    // When useCRBits() is true, there can be i1 arguments.
5546
1.02k
    // It is because getRegisterType(MVT::i1) => MVT::i1,
5547
1.02k
    // and for other integer types getRegisterType() => MVT::i32.
5548
1.02k
    // Extend i1 and ensure callee will get i32.
5549
1.02k
    if (Arg.getValueType() == MVT::i1)
5550
4
      Arg = DAG.getNode(Flags.isSExt() ? 
ISD::SIGN_EXTEND0
: ISD::ZERO_EXTEND,
5551
4
                        dl, MVT::i32, Arg);
5552
1.02k
5553
1.02k
    if (VA.isRegLoc()) {
5554
1.01k
      seenFloatArg |= VA.getLocVT().isFloatingPoint();
5555
1.01k
      // Put argument in a physical register.
5556
1.01k
      if (Subtarget.hasSPE() && 
Arg.getValueType() == MVT::f6420
) {
5557
2
        bool IsLE = Subtarget.isLittleEndian();
5558
2
        SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5559
2
                        DAG.getIntPtrConstant(IsLE ? 
00
: 1, dl));
5560
2
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5561
2
        SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5562
2
                           DAG.getIntPtrConstant(IsLE ? 
10
: 0, dl));
5563
2
        RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5564
2
                             SVal.getValue(0)));
5565
2
      } else
5566
1.01k
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5567
1.01k
    } else {
5568
15
      // Put argument in the parameter list area of the current stack frame.
5569
15
      assert(VA.isMemLoc());
5570
15
      unsigned LocMemOffset = VA.getLocMemOffset();
5571
15
5572
15
      if (!isTailCall) {
5573
15
        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5574
15
        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5575
15
                             StackPtr, PtrOff);
5576
15
5577
15
        MemOpChains.push_back(
5578
15
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5579
15
      } else {
5580
0
        // Calculate and remember argument location.
5581
0
        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5582
0
                                 TailCallArguments);
5583
0
      }
5584
15
    }
5585
1.02k
  }
5586
388
5587
388
  if (!MemOpChains.empty())
5588
12
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5589
388
5590
388
  // Build a sequence of copy-to-reg nodes chained together with token chain
5591
388
  // and flag operands which copy the outgoing args into the appropriate regs.
5592
388
  SDValue InFlag;
5593
1.40k
  for (unsigned i = 0, e = RegsToPass.size(); i != e; 
++i1.01k
) {
5594
1.01k
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5595
1.01k
                             RegsToPass[i].second, InFlag);
5596
1.01k
    InFlag = Chain.getValue(1);
5597
1.01k
  }
5598
388
5599
388
  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5600
388
  // registers.
5601
388
  if (isVarArg) {
5602
89
    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5603
89
    SDValue Ops[] = { Chain, InFlag };
5604
89
5605
89
    Chain = DAG.getNode(seenFloatArg ? 
PPCISD::CR6SET16
:
PPCISD::CR6UNSET73
,
5606
89
                        dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 
260
:
129
));
5607
89
5608
89
    InFlag = Chain.getValue(1);
5609
89
  }
5610
388
5611
388
  if (isTailCall)
5612
2
    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5613
2
                    TailCallArguments);
5614
388
5615
388
  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5616
388
                    /* unused except on PPC64 ELFv1 */ false, DAG,
5617
388
                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5618
388
                    NumBytes, Ins, InVals, CS);
5619
388
}
5620
5621
// Copy an argument into memory, being careful to do this outside the
5622
// call sequence for the call to which the argument belongs.
5623
SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5624
    SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5625
52
    SelectionDAG &DAG, const SDLoc &dl) const {
5626
52
  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5627
52
                        CallSeqStart.getNode()->getOperand(0),
5628
52
                        Flags, DAG, dl);
5629
52
  // The MEMCPY must go outside the CALLSEQ_START..END.
5630
52
  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5631
52
  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5632
52
                                                 SDLoc(MemcpyCall));
5633
52
  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5634
52
                         NewCallSeqStart.getNode());
5635
52
  return NewCallSeqStart;
5636
52
}
5637
5638
SDValue PPCTargetLowering::LowerCall_64SVR4(
5639
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5640
    bool isTailCall, bool isPatchPoint,
5641
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5642
    const SmallVectorImpl<SDValue> &OutVals,
5643
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5644
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5645
1.80k
    ImmutableCallSite CS) const {
5646
1.80k
  bool isELFv2ABI = Subtarget.isELFv2ABI();
5647
1.80k
  bool isLittleEndian = Subtarget.isLittleEndian();
5648
1.80k
  unsigned NumOps = Outs.size();
5649
1.80k
  bool hasNest = false;
5650
1.80k
  bool IsSibCall = false;
5651
1.80k
5652
1.80k
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5653
1.80k
  unsigned PtrByteSize = 8;
5654
1.80k
5655
1.80k
  MachineFunction &MF = DAG.getMachineFunction();
5656
1.80k
5657
1.80k
  if (isTailCall && 
!getTargetMachine().Options.GuaranteedTailCallOpt58
)
5658
57
    IsSibCall = true;
5659
1.80k
5660
1.80k
  // Mark this function as potentially containing a function that contains a
5661
1.80k
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5662
1.80k
  // and restoring the callers stack pointer in this functions epilog. This is
5663
1.80k
  // done because by tail calling the called function might overwrite the value
5664
1.80k
  // in this function's (MF) stack pointer stack slot 0(SP).
5665
1.80k
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5666
1.80k
      
CallConv == CallingConv::Fast1
)
5667
1
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5668
1.80k
5669
1.80k
  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5670
1.80k
         "fastcc not supported on varargs functions");
5671
1.80k
5672
1.80k
  // Count how many bytes are to be pushed on the stack, including the linkage
5673
1.80k
  // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5674
1.80k
  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5675
1.80k
  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5676
1.80k
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5677
1.80k
  unsigned NumBytes = LinkageSize;
5678
1.80k
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5679
1.80k
  unsigned &QFPR_idx = FPR_idx;
5680
1.80k
5681
1.80k
  static const MCPhysReg GPR[] = {
5682
1.80k
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5683
1.80k
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5684
1.80k
  };
5685
1.80k
  static const MCPhysReg VR[] = {
5686
1.80k
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5687
1.80k
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5688
1.80k
  };
5689
1.80k
5690
1.80k
  const unsigned NumGPRs = array_lengthof(GPR);
5691
1.80k
  const unsigned NumFPRs = useSoftFloat() ? 
08
:
131.79k
;
5692
1.80k
  const unsigned NumVRs  = array_lengthof(VR);
5693
1.80k
  const unsigned NumQFPRs = NumFPRs;
5694
1.80k
5695
1.80k
  // On ELFv2, we can avoid allocating the parameter area if all the arguments
5696
1.80k
  // can be passed to the callee in registers.
5697
1.80k
  // For the fast calling convention, there is another check below.
5698
1.80k
  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5699
1.80k
  bool HasParameterArea = !isELFv2ABI || 
isVarArg1.12k
||
CallConv == CallingConv::Fast1.10k
;
5700
1.80k
  if (!HasParameterArea) {
5701
1.09k
    unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5702
1.09k
    unsigned AvailableFPRs = NumFPRs;
5703
1.09k
    unsigned AvailableVRs = NumVRs;
5704
1.09k
    unsigned NumBytesTmp = NumBytes;
5705
2.86k
    for (unsigned i = 0; i != NumOps; 
++i1.77k
) {
5706
1.77k
      if (Outs[i].Flags.isNest()) 
continue0
;
5707
1.77k
      if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5708
1.77k
                                PtrByteSize, LinkageSize, ParamAreaSize,
5709
1.77k
                                NumBytesTmp, AvailableFPRs, AvailableVRs,
5710
1.77k
                                Subtarget.hasQPX()))
5711
91
        HasParameterArea = true;
5712
1.77k
    }
5713
1.09k
  }
5714
1.80k
5715
1.80k
  // When using the fast calling convention, we don't provide backing for
5716
1.80k
  // arguments that will be in registers.
5717
1.80k
  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5718
1.80k
5719
1.80k
  // Avoid allocating parameter area for fastcc functions if all the arguments
5720
1.80k
  // can be passed in the registers.
5721
1.80k
  if (CallConv == CallingConv::Fast)
5722
38
    HasParameterArea = false;
5723
1.80k
5724
1.80k
  // Add up all the space actually used.
5725
6.01k
  for (unsigned i = 0; i != NumOps; 
++i4.21k
) {
5726
4.21k
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5727
4.21k
    EVT ArgVT = Outs[i].VT;
5728
4.21k
    EVT OrigVT = Outs[i].ArgVT;
5729
4.21k
5730
4.21k
    if (Flags.isNest())
5731
2
      continue;
5732
4.21k
5733
4.21k
    if (CallConv == CallingConv::Fast) {
5734
1.17k
      if (Flags.isByVal()) {
5735
3
        NumGPRsUsed += (Flags.getByValSize()+7)/8;
5736
3
        if (NumGPRsUsed > NumGPRs)
5737
1
          HasParameterArea = true;
5738
1.17k
      } else {
5739
1.17k
        switch (ArgVT.getSimpleVT().SimpleTy) {
5740
1.17k
        
default: 0
llvm_unreachable0
("Unexpected ValueType for argument!");
5741
1.17k
        case MVT::i1:
5742
454
        case MVT::i32:
5743
454
        case MVT::i64:
5744
454
          if (++NumGPRsUsed <= NumGPRs)
5745
245
            continue;
5746
209
          break;
5747
352
        case MVT::v4i32:
5748
352
        case MVT::v8i16:
5749
352
        case MVT::v16i8:
5750
352
        case MVT::v2f64:
5751
352
        case MVT::v2i64:
5752
352
        case MVT::v1i128:
5753
352
        case MVT::f128:
5754
352
          if (++NumVRsUsed <= NumVRs)
5755
264
            continue;
5756
88
          break;
5757
88
        case MVT::v4f32:
5758
0
          // When using QPX, this is handled like a FP register, otherwise, it
5759
0
          // is an Altivec register.
5760
0
          if (Subtarget.hasQPX()) {
5761
0
            if (++NumFPRsUsed <= NumFPRs)
5762
0
              continue;
5763
0
          } else {
5764
0
            if (++NumVRsUsed <= NumVRs)
5765
0
              continue;
5766
0
          }
5767
0
          break;
5768
368
        case MVT::f32:
5769
368
        case MVT::f64:
5770
368
        case MVT::v4f64: // QPX
5771
368
        case MVT::v4i1:  // QPX
5772
368
          if (++NumFPRsUsed <= NumFPRs)
5773
301
            continue;
5774
67
          break;
5775
364
        }
5776
364
        HasParameterArea = true;
5777
364
      }
5778
1.17k
    }
5779
4.21k
5780
4.21k
    /* Respect alignment of argument on the stack.  */
5781
4.21k
    unsigned Align =
5782
3.40k
      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5783
3.40k
    NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5784
3.40k
5785
3.40k
    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5786
3.40k
    if (Flags.isInConsecutiveRegsLast())
5787
94
      NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5788
3.40k
  }
5789
1.80k
5790
1.80k
  unsigned NumBytesActuallyUsed = NumBytes;
5791
1.80k
5792
1.80k
  // In the old ELFv1 ABI,
5793
1.80k
  // the prolog code of the callee may store up to 8 GPR argument registers to
5794
1.80k
  // the stack, allowing va_start to index over them in memory if its varargs.
5795
1.80k
  // Because we cannot tell if this is needed on the caller side, we have to
5796
1.80k
  // conservatively assume that it is needed.  As such, make sure we have at
5797
1.80k
  // least enough stack space for the caller to store the 8 GPRs.
5798
1.80k
  // In the ELFv2 ABI, we allocate the parameter area iff a callee
5799
1.80k
  // really requires memory operands, e.g. a vararg function.
5800
1.80k
  if (HasParameterArea)
5801
736
    NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5802
1.06k
  else
5803
1.06k
    NumBytes = LinkageSize;
5804
1.80k
5805
1.80k
  // Tail call needs the stack to be aligned.
5806
1.80k
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5807
1.80k
      
CallConv == CallingConv::Fast1
)
5808
1
    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5809
1.80k
5810
1.80k
  int SPDiff = 0;
5811
1.80k
5812
1.80k
  // Calculate by how many bytes the stack has to be adjusted in case of tail
5813
1.80k
  // call optimization.
5814
1.80k
  if (!IsSibCall)
5815
1.74k
    SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5816
1.80k
5817
1.80k
  // To protect arguments on the stack from being clobbered in a tail call,
5818
1.80k
  // force all the loads to happen before doing any other lowering.
5819
1.80k
  if (isTailCall)
5820
58
    Chain = DAG.getStackArgumentTokenFactor(Chain);
5821
1.80k
5822
1.80k
  // Adjust the stack pointer for the new arguments...
5823
1.80k
  // These operations are automatically eliminated by the prolog/epilog pass
5824
1.80k
  if (!IsSibCall)
5825
1.74k
    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5826
1.80k
  SDValue CallSeqStart = Chain;
5827
1.80k
5828
1.80k
  // Load the return address and frame pointer so it can be move somewhere else
5829
1.80k
  // later.
5830
1.80k
  SDValue LROp, FPOp;
5831
1.80k
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5832
1.80k
5833
1.80k
  // Set up a copy of the stack pointer for use loading and storing any
5834
1.80k
  // arguments that may not fit in the registers available for argument
5835
1.80k
  // passing.
5836
1.80k
  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5837
1.80k
5838
1.80k
  // Figure out which arguments are going to go in registers, and which in
5839
1.80k
  // memory.  Also, if this is a vararg function, floating point operations
5840
1.80k
  // must be stored to our stack, and loaded into integer regs as well, if
5841
1.80k
  // any integer regs are available for argument passing.
5842
1.80k
  unsigned ArgOffset = LinkageSize;
5843
1.80k
5844
1.80k
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5845
1.80k
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5846
1.80k
5847
1.80k
  SmallVector<SDValue, 8> MemOpChains;
5848
6.01k
  for (unsigned i = 0; i != NumOps; 
++i4.21k
) {
5849
4.21k
    SDValue Arg = OutVals[i];
5850
4.21k
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5851
4.21k
    EVT ArgVT = Outs[i].VT;
5852
4.21k
    EVT OrigVT = Outs[i].ArgVT;
5853
4.21k
5854
4.21k
    // PtrOff will be used to store the current argument to the stack if a
5855
4.21k
    // register cannot be found for it.
5856
4.21k
    SDValue PtrOff;
5857
4.21k
5858
4.21k
    // We re-align the argument offset for each argument, except when using the
5859
4.21k
    // fast calling convention, when we need to make sure we do that only when
5860
4.21k
    // we'll actually use a stack slot.
5861
4.21k
    auto ComputePtrOff = [&]() {
5862
3.40k
      /* Respect alignment of argument on the stack.  */
5863
3.40k
      unsigned Align =
5864
3.40k
        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5865
3.40k
      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5866
3.40k
5867
3.40k
      PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5868
3.40k
5869
3.40k
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5870
3.40k
    };
5871
4.21k
5872
4.21k
    if (CallConv != CallingConv::Fast) {
5873
3.03k
      ComputePtrOff();
5874
3.03k
5875
3.03k
      /* Compute GPR index associated with argument offset.  */
5876
3.03k
      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5877
3.03k
      GPR_idx = std::min(GPR_idx, NumGPRs);
5878
3.03k
    }
5879
4.21k
5880
4.21k
    // Promote integers to 64-bit values.
5881
4.21k
    if (Arg.getValueType() == MVT::i32 || 
Arg.getValueType() == MVT::i13.82k
) {
5882
392
      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5883
392
      unsigned ExtOp = Flags.isSExt() ? 
ISD::SIGN_EXTEND242
:
ISD::ZERO_EXTEND150
;
5884
392
      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5885
392
    }
5886
4.21k
5887
4.21k
    // FIXME memcpy is used way more than necessary.  Correctness first.
5888
4.21k
    // Note: "by value" is code for passing a structure by value, not
5889
4.21k
    // basic types.
5890
4.21k
    if (Flags.isByVal()) {
5891
61
      // Note: Size includes alignment padding, so
5892
61
      //   struct x { short a; char b; }
5893
61
      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5894
61
      // These are the proper values we need for right-justifying the
5895
61
      // aggregate in a parameter register.
5896
61
      unsigned Size = Flags.getByValSize();
5897
61
5898
61
      // An empty aggregate parameter takes up no storage and no
5899
61
      // registers.
5900
61
      if (Size == 0)
5901
2
        continue;
5902
59
5903
59
      if (CallConv == CallingConv::Fast)
5904
3
        ComputePtrOff();
5905
59
5906
59
      // All aggregates smaller than 8 bytes must be passed right-justified.
5907
59
      if (Size==1 || 
Size==255
||
Size==451
) {
5908
16
        EVT VT = (Size==1) ? 
MVT::i84
:
((Size==2) 12
?
MVT::i164
:
MVT::i328
);
5909
16
        if (GPR_idx != NumGPRs) {
5910
7
          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5911
7
                                        MachinePointerInfo(), VT);
5912
7
          MemOpChains.push_back(Load.getValue(1));
5913
7
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5914
7
5915
7
          ArgOffset += PtrByteSize;
5916
7
          continue;
5917
7
        }
5918
52
      }
5919
52
5920
52
      if (GPR_idx == NumGPRs && 
Size < 823
) {
5921
13
        SDValue AddPtr = PtrOff;
5922
13
        if (!isLittleEndian) {
5923
12
          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5924
12
                                          PtrOff.getValueType());
5925
12
          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5926
12
        }
5927
13
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5928
13
                                                          CallSeqStart,
5929
13
                                                          Flags, DAG, dl);
5930
13
        ArgOffset += PtrByteSize;
5931
13
        continue;
5932
13
      }
5933
39
      // Copy entire object into memory.  There are cases where gcc-generated
5934
39
      // code assumes it is there, even if it could be put entirely into
5935
39
      // registers.  (This is not what the doc says.)
5936
39
5937
39
      // FIXME: The above statement is likely due to a misunderstanding of the
5938
39
      // documents.  All arguments must be copied into the parameter area BY
5939
39
      // THE CALLEE in the event that the callee takes the address of any
5940
39
      // formal argument.  That has not yet been implemented.  However, it is
5941
39
      // reasonable to use the stack area as a staging area for the register
5942
39
      // load.
5943
39
5944
39
      // Skip this for small aggregates, as we will use the same slot for a
5945
39
      // right-justified copy, below.
5946
39
      if (Size >= 8)
5947
31
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5948
31
                                                          CallSeqStart,
5949
31
                                                          Flags, DAG, dl);
5950
39
5951
39
      // When a register is available, pass a small aggregate right-justified.
5952
39
      if (Size < 8 && 
GPR_idx != NumGPRs8
) {
5953
8
        // The easiest way to get this right-justified in a register
5954
8
        // is to copy the structure into the rightmost portion of a
5955
8
        // local variable slot, then load the whole slot into the
5956
8
        // register.
5957
8
        // FIXME: The memcpy seems to produce pretty awful code for
5958
8
        // small aggregates, particularly for packed ones.
5959
8
        // FIXME: It would be preferable to use the slot in the
5960
8
        // parameter save area instead of a new local variable.
5961
8
        SDValue AddPtr = PtrOff;
5962
8
        if (!isLittleEndian) {
5963
8
          SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5964
8
          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5965
8
        }
5966
8
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5967
8
                                                          CallSeqStart,
5968
8
                                                          Flags, DAG, dl);
5969
8
5970
8
        // Load the slot into the register.
5971
8
        SDValue Load =
5972
8
            DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5973
8
        MemOpChains.push_back(Load.getValue(1));
5974
8
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5975
8
5976
8
        // Done with this argument.
5977
8
        ArgOffset += PtrByteSize;
5978
8
        continue;
5979
8
      }
5980
31
5981
31
      // For aggregates larger than PtrByteSize, copy the pieces of the
5982
31
      // object that fit into registers from the parameter save area.
5983
118
      
for (unsigned j=0; 31
j<Size;
j+=PtrByteSize87
) {
5984
106
        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5985
106
        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5986
106
        if (GPR_idx != NumGPRs) {
5987
87
          SDValue Load =
5988
87
              DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5989
87
          MemOpChains.push_back(Load.getValue(1));
5990
87
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5991
87
          ArgOffset += PtrByteSize;
5992
87
        } else {
5993
19
          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5994
19
          break;
5995
19
        }
5996
106
      }
5997
31
      continue;
5998
31
    }
5999
4.15k
6000
4.15k
    switch (Arg.getSimpleValueType().SimpleTy) {
6001
4.15k
    
default: 0
llvm_unreachable0
("Unexpected ValueType for argument!");
6002
4.15k
    case MVT::i1:
6003
2.23k
    case MVT::i32:
6004
2.23k
    case MVT::i64:
6005
2.23k
      if (Flags.isNest()) {
6006
2
        // The 'nest' parameter, if any, is passed in R11.
6007
2
        RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6008
2
        hasNest = true;
6009
2
        break;
6010
2
      }
6011
2.22k
6012
2.22k
      // These can be scalar arguments or elements of an integer array type
6013
2.22k
      // passed directly.  Clang may use those instead of "byval" aggregate
6014
2.22k
      // types to avoid forcing arguments to memory unnecessarily.
6015
2.22k
      if (GPR_idx != NumGPRs) {
6016
1.88k
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6017
1.88k
      } else {
6018
348
        if (CallConv == CallingConv::Fast)
6019
209
          ComputePtrOff();
6020
348
6021
348
        assert(HasParameterArea &&
6022
348
               "Parameter area must exist to pass an argument in memory.");
6023
348
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6024
348
                         true, isTailCall, false, MemOpChains,
6025
348
                         TailCallArguments, dl);
6026
348
        if (CallConv == CallingConv::Fast)
6027
209
          ArgOffset += PtrByteSize;
6028
348
      }
6029
2.22k
      if (CallConv != CallingConv::Fast)
6030
1.77k
        ArgOffset += PtrByteSize;
6031
2.22k
      break;
6032
2.22k
    case MVT::f32:
6033
1.44k
    case MVT::f64: {
6034
1.44k
      // These can be scalar arguments or elements of a float array type
6035
1.44k
      // passed directly.  The latter are used to implement ELFv2 homogenous
6036
1.44k
      // float aggregates.
6037
1.44k
6038
1.44k
      // Named arguments go into FPRs first, and once they overflow, the
6039
1.44k
      // remaining arguments go into GPRs and then the parameter save area.
6040
1.44k
      // Unnamed arguments for vararg functions always go to GPRs and
6041
1.44k
      // then the parameter save area.  For now, put all arguments to vararg
6042
1.44k
      // routines always in both locations (FPR *and* GPR or stack slot).
6043
1.44k
      bool NeedGPROrStack = isVarArg || 
FPR_idx == NumFPRs1.43k
;
6044
1.44k
      bool NeededLoad = false;
6045
1.44k
6046
1.44k
      // First load the argument into the next available FPR.
6047
1.44k
      if (FPR_idx != NumFPRs)
6048
1.34k
        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6049
1.44k
6050
1.44k
      // Next, load the argument into GPR or stack slot if needed.
6051
1.44k
      if (!NeedGPROrStack)
6052
1.33k
        ;
6053
109
      else if (GPR_idx != NumGPRs && 
CallConv != CallingConv::Fast34
) {
6054
33
        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6055
33
        // once we support fp <-> gpr moves.
6056
33
6057
33
        // In the non-vararg case, this can only ever happen in the
6058
33
        // presence of f32 array types, since otherwise we never run
6059
33
        // out of FPRs before running out of GPRs.
6060
33
        SDValue ArgVal;
6061
33
6062
33
        // Double values are always passed in a single GPR.
6063
33
        if (Arg.getValueType() != MVT::f32) {
6064
8
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6065
8
6066
8
        // Non-array float values are extended and passed in a GPR.
6067
25
        } else if (!Flags.isInConsecutiveRegs()) {
6068
4
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6069
4
          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6070
4
6071
4
        // If we have an array of floats, we collect every odd element
6072
4
        // together with its predecessor into one GPR.
6073
21
        } else if (ArgOffset % PtrByteSize != 0) {
6074
9
          SDValue Lo, Hi;
6075
9
          Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6076
9
          Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6077
9
          if (!isLittleEndian)
6078
0
            std::swap(Lo, Hi);
6079
9
          ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6080
9
6081
9
        // The final element, if even, goes into the first half of a GPR.
6082
12
        } else if (Flags.isInConsecutiveRegsLast()) {
6083
6
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6084
6
          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6085
6
          if (!isLittleEndian)
6086
0
            ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6087
0
                                 DAG.getConstant(32, dl, MVT::i32));
6088
6
6089
6
        // Non-final even elements are skipped; they will be handled
6090
6
        // together the with subsequent argument on the next go-around.
6091
6
        } else
6092
6
          ArgVal = SDValue();
6093
33
6094
33
        if (ArgVal.getNode())
6095
27
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6096
76
      } else {
6097
76
        if (CallConv == CallingConv::Fast)
6098
67
          ComputePtrOff();
6099
76
6100
76
        // Single-precision floating-point values are mapped to the
6101
76
        // second (rightmost) word of the stack doubleword.
6102
76
        if (Arg.getValueType() == MVT::f32 &&
6103
76
            
!isLittleEndian4
&&
!Flags.isInConsecutiveRegs()2
) {
6104
2
          SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6105
2
          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6106
2
        }
6107
76
6108
76
        assert(HasParameterArea &&
6109
76
               "Parameter area must exist to pass an argument in memory.");
6110
76
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6111
76
                         true, isTailCall, false, MemOpChains,
6112
76
                         TailCallArguments, dl);
6113
76
6114
76
        NeededLoad = true;
6115
76
      }
6116
1.44k
      // When passing an array of floats, the array occupies consecutive
6117
1.44k
      // space in the argument area; only round up to the next doubleword
6118
1.44k
      // at the end of the array.  Otherwise, each float takes 8 bytes.
6119
1.44k
      if (CallConv != CallingConv::Fast || 
NeededLoad368
) {
6120
1.14k
        ArgOffset += (Arg.getValueType() == MVT::f32 &&
6121
1.14k
                      
Flags.isInConsecutiveRegs()552
) ?
4255
:
8885
;
6122
1.14k
        if (Flags.isInConsecutiveRegsLast())
6123
45
          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6124
1.14k
      }
6125
1.44k
      break;
6126
1.44k
    }
6127
1.44k
    case MVT::v4f32:
6128
481
    case MVT::v4i32:
6129
481
    case MVT::v8i16:
6130
481
    case MVT::v16i8:
6131
481
    case MVT::v2f64:
6132
481
    case MVT::v2i64:
6133
481
    case MVT::v1i128:
6134
481
    case MVT::f128:
6135
481
      if (!Subtarget.hasQPX()) {
6136
481
      // These can be scalar arguments or elements of a vector array type
6137
481
      // passed directly.  The latter are used to implement ELFv2 homogenous
6138
481
      // vector aggregates.
6139
481
6140
481
      // For a varargs call, named arguments go into VRs or on the stack as
6141
481
      // usual; unnamed arguments always go to the stack or the corresponding
6142
481
      // GPRs when within range.  For now, we always put the value in both
6143
481
      // locations (or even all three).
6144
481
      if (isVarArg) {
6145
25
        assert(HasParameterArea &&
6146
25
               "Parameter area must exist if we have a varargs call.");
6147
25
        // We could elide this store in the case where the object fits
6148
25
        // entirely in R registers.  Maybe later.
6149
25
        SDValue Store =
6150
25
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6151
25
        MemOpChains.push_back(Store);
6152
25
        if (VR_idx != NumVRs) {
6153
25
          SDValue Load =
6154
25
              DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6155
25
          MemOpChains.push_back(Load.getValue(1));
6156
25
          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6157
25
        }
6158
25
        ArgOffset += 16;
6159
75
        for (unsigned i=0; i<16; 
i+=PtrByteSize50
) {
6160
50
          if (GPR_idx == NumGPRs)
6161
0
            break;
6162
50
          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6163
50
                                   DAG.getConstant(i, dl, PtrVT));
6164
50
          SDValue Load =
6165
50
              DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6166
50
          MemOpChains.push_back(Load.getValue(1));
6167
50
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6168
50
        }
6169
25
        break;
6170
25
      }
6171
456
6172
456
      // Non-varargs Altivec params go into VRs or on the stack.
6173
456
      if (VR_idx != NumVRs) {
6174
368
        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6175
368
      } else {
6176
88
        if (CallConv == CallingConv::Fast)
6177
88
          ComputePtrOff();
6178
88
6179
88
        assert(HasParameterArea &&
6180
88
               "Parameter area must exist to pass an argument in memory.");
6181
88
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6182
88
                         true, isTailCall, true, MemOpChains,
6183
88
                         TailCallArguments, dl);
6184
88
        if (CallConv == CallingConv::Fast)
6185
88
          ArgOffset += 16;
6186
88
      }
6187
456
6188
456
      if (CallConv != CallingConv::Fast)
6189
104
        ArgOffset += 16;
6190
456
      break;
6191
456
      } // not QPX
6192
0
6193
0
      assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
6194
0
             "Invalid QPX parameter type");
6195
0
6196
0
      LLVM_FALLTHROUGH;
6197
2
    case MVT::v4f64:
6198
2
    case MVT::v4i1: {
6199
2
      bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6200
2
      if (isVarArg) {
6201
0
        assert(HasParameterArea &&
6202
0
               "Parameter area must exist if we have a varargs call.");
6203
0
        // We could elide this store in the case where the object fits
6204
0
        // entirely in R registers.  Maybe later.
6205
0
        SDValue Store =
6206
0
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6207
0
        MemOpChains.push_back(Store);
6208
0
        if (QFPR_idx != NumQFPRs) {
6209
0
          SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6210
0
                                     PtrOff, MachinePointerInfo());
6211
0
          MemOpChains.push_back(Load.getValue(1));
6212
0
          RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6213
0
        }
6214
0
        ArgOffset += (IsF32 ? 16 : 32);
6215
0
        for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6216
0
          if (GPR_idx == NumGPRs)
6217
0
            break;
6218
0
          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6219
0
                                   DAG.getConstant(i, dl, PtrVT));
6220
0
          SDValue Load =
6221
0
              DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6222
0
          MemOpChains.push_back(Load.getValue(1));
6223
0
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6224
0
        }
6225
0
        break;
6226
0
      }
6227
2
6228
2
      // Non-varargs QPX params go into registers or on the stack.
6229
2
      if (QFPR_idx != NumQFPRs) {
6230
2
        RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6231
2
      } else {
6232
0
        if (CallConv == CallingConv::Fast)
6233
0
          ComputePtrOff();
6234
0
6235
0
        assert(HasParameterArea &&
6236
0
               "Parameter area must exist to pass an argument in memory.");
6237
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6238
0
                         true, isTailCall, true, MemOpChains,
6239
0
                         TailCallArguments, dl);
6240
0
        if (CallConv == CallingConv::Fast)
6241
0
          ArgOffset += (IsF32 ? 16 : 32);
6242
0
      }
6243
2
6244
2
      if (CallConv != CallingConv::Fast)
6245
2
        ArgOffset += (IsF32 ? 
160
: 32);
6246
2
      break;
6247
2
      }
6248
4.15k
    }
6249
4.15k
  }
6250
1.80k
6251
1.80k
  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6252
1.80k
         "mismatch in size of parameter area");
6253
1.80k
  (void)NumBytesActuallyUsed;
6254
1.80k
6255
1.80k
  if (!MemOpChains.empty())
6256
111
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6257
1.80k
6258
1.80k
  // Check if this is an indirect call (MTCTR/BCTRL).
6259
1.80k
  // See PrepareCall() for more information about calls through function
6260
1.80k
  // pointers in the 64-bit SVR4 ABI.
6261
1.80k
  if (!isTailCall && 
!isPatchPoint1.74k
&&
6262
1.80k
      
!isFunctionGlobalAddress(Callee)1.71k
&&
6263
1.80k
      
!isa<ExternalSymbolSDNode>(Callee)571
) {
6264
61
    // Load r2 into a virtual register and store it to the TOC save area.
6265
61
    setUsesTOCBasePtr(DAG);
6266
61
    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6267
61
    // TOC save area offset.
6268
61
    unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6269
61
    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6270
61
    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6271
61
    Chain = DAG.getStore(
6272
61
        Val.getValue(1), dl, Val, AddPtr,
6273
61
        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
6274
61
    // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6275
61
    // This does not mean the MTCTR instruction must use R12; it's easier
6276
61
    // to model this as an extra parameter, so do that.
6277
61
    if (isELFv2ABI && 
!isPatchPoint31
)
6278
31
      RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6279
61
  }
6280
1.80k
6281
1.80k
  // Build a sequence of copy-to-reg nodes chained together with token chain
6282
1.80k
  // and flag operands which copy the outgoing args into the appropriate regs.
6283
1.80k
  SDValue InFlag;
6284
5.63k
  for (unsigned i = 0, e = RegsToPass.size(); i != e; 
++i3.82k
) {
6285
3.82k
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6286
3.82k
                             RegsToPass[i].second, InFlag);
6287
3.82k
    InFlag = Chain.getValue(1);
6288
3.82k
  }
6289
1.80k
6290
1.80k
  if (isTailCall && 
!IsSibCall58
)
6291
1
    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6292
1
                    TailCallArguments);
6293
1.80k
6294
1.80k
  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
6295
1.80k
                    DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
6296
1.80k
                    SPDiff, NumBytes, Ins, InVals, CS);
6297
1.80k
}
6298
6299
SDValue PPCTargetLowering::LowerCall_Darwin(
6300
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6301
    bool isTailCall, bool isPatchPoint,
6302
    const SmallVectorImpl<ISD::OutputArg> &Outs,
6303
    const SmallVectorImpl<SDValue> &OutVals,
6304
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6305
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6306
0
    ImmutableCallSite CS) const {
6307
0
  unsigned NumOps = Outs.size();
6308
0
6309
0
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6310
0
  bool isPPC64 = PtrVT == MVT::i64;
6311
0
  unsigned PtrByteSize = isPPC64 ? 8 : 4;
6312
0
6313
0
  MachineFunction &MF = DAG.getMachineFunction();
6314
0
6315
0
  // Mark this function as potentially containing a function that contains a
6316
0
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
6317
0
  // and restoring the callers stack pointer in this functions epilog. This is
6318
0
  // done because by tail calling the called function might overwrite the value
6319
0
  // in this function's (MF) stack pointer stack slot 0(SP).
6320
0
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6321
0
      CallConv == CallingConv::Fast)
6322
0
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6323
0
6324
0
  // Count how many bytes are to be pushed on the stack, including the linkage
6325
0
  // area, and parameter passing area.  We start with 24/48 bytes, which is
6326
0
  // prereserved space for [SP][CR][LR][3 x unused].
6327
0
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6328
0
  unsigned NumBytes = LinkageSize;
6329
0
6330
0
  // Add up all the space actually used.
6331
0
  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6332
0
  // they all go in registers, but we must reserve stack space for them for
6333
0
  // possible use by the caller.  In varargs or 64-bit calls, parameters are
6334
0
  // assigned stack space in order, with padding so Altivec parameters are
6335
0
  // 16-byte aligned.
6336
0
  unsigned nAltivecParamsAtEnd = 0;
6337
0
  for (unsigned i = 0; i != NumOps; ++i) {
6338
0
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
6339
0
    EVT ArgVT = Outs[i].VT;
6340
0
    // Varargs Altivec parameters are padded to a 16 byte boundary.
6341
0
    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6342
0
        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6343
0
        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6344
0
      if (!isVarArg && !isPPC64) {
6345
0
        // Non-varargs Altivec parameters go after all the non-Altivec
6346
0
        // parameters; handle those later so we know how much padding we need.
6347
0
        nAltivecParamsAtEnd++;
6348
0
        continue;
6349
0
      }
6350
0
      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6351
0
      NumBytes = ((NumBytes+15)/16)*16;
6352
0
    }
6353
0
    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6354
0
  }
6355
0
6356
0
  // Allow for Altivec parameters at the end, if needed.
6357
0
  if (nAltivecParamsAtEnd) {
6358
0
    NumBytes = ((NumBytes+15)/16)*16;
6359
0
    NumBytes += 16*nAltivecParamsAtEnd;
6360
0
  }
6361
0
6362
0
  // The prolog code of the callee may store up to 8 GPR argument registers to
6363
0
  // the stack, allowing va_start to index over them in memory if its varargs.
6364
0
  // Because we cannot tell if this is needed on the caller side, we have to
6365
0
  // conservatively assume that it is needed.  As such, make sure we have at
6366
0
  // least enough stack space for the caller to store the 8 GPRs.
6367
0
  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6368
0
6369
0
  // Tail call needs the stack to be aligned.
6370
0
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6371
0
      CallConv == CallingConv::Fast)
6372
0
    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6373
0
6374
0
  // Calculate by how many bytes the stack has to be adjusted in case of tail
6375
0
  // call optimization.
6376
0
  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6377
0
6378
0
  // To protect arguments on the stack from being clobbered in a tail call,
6379
0
  // force all the loads to happen before doing any other lowering.
6380
0
  if (isTailCall)
6381
0
    Chain = DAG.getStackArgumentTokenFactor(Chain);
6382
0
6383
0
  // Adjust the stack pointer for the new arguments...
6384
0
  // These operations are automatically eliminated by the prolog/epilog pass
6385
0
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6386
0
  SDValue CallSeqStart = Chain;
6387
0
6388
0
  // Load the return address and frame pointer so it can be move somewhere else
6389
0
  // later.
6390
0
  SDValue LROp, FPOp;
6391
0
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6392
0
6393
0
  // Set up a copy of the stack pointer for use loading and storing any
6394
0
  // arguments that may not fit in the registers available for argument
6395
0
  // passing.
6396
0
  SDValue StackPtr;
6397
0
  if (isPPC64)
6398
0
    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6399
0
  else
6400
0
    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6401
0
6402
0
  // Figure out which arguments are going to go in registers, and which in
6403
0
  // memory.  Also, if this is a vararg function, floating point operations
6404
0
  // must be stored to our stack, and loaded into integer regs as well, if
6405
0
  // any integer regs are available for argument passing.
6406
0
  unsigned ArgOffset = LinkageSize;
6407
0
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6408
0
6409
0
  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6410
0
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6411
0
    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6412
0
  };
6413
0
  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6414
0
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6415
0
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6416
0
  };
6417
0
  static const MCPhysReg VR[] = {
6418
0
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6419
0
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6420
0
  };
6421
0
  const unsigned NumGPRs = array_lengthof(GPR_32);
6422
0
  const unsigned NumFPRs = 13;
6423
0
  const unsigned NumVRs  = array_lengthof(VR);
6424
0
6425
0
  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6426
0
6427
0
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6428
0
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6429
0
6430
0
  SmallVector<SDValue, 8> MemOpChains;
6431
0
  for (unsigned i = 0; i != NumOps; ++i) {
6432
0
    SDValue Arg = OutVals[i];
6433
0
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
6434
0
6435
0
    // PtrOff will be used to store the current argument to the stack if a
6436
0
    // register cannot be found for it.
6437
0
    SDValue PtrOff;
6438
0
6439
0
    PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6440
0
6441
0
    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6442
0
6443
0
    // On PPC64, promote integers to 64-bit values.
6444
0
    if (isPPC64 && Arg.getValueType() == MVT::i32) {
6445
0
      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6446
0
      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6447
0
      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6448
0
    }
6449
0
6450
0
    // FIXME memcpy is used way more than necessary.  Correctness first.
6451
0
    // Note: "by value" is code for passing a structure by value, not
6452
0
    // basic types.
6453
0
    if (Flags.isByVal()) {
6454
0
      unsigned Size = Flags.getByValSize();
6455
0
      // Very small objects are passed right-justified.  Everything else is
6456
0
      // passed left-justified.
6457
0
      if (Size==1 || Size==2) {
6458
0
        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6459
0
        if (GPR_idx != NumGPRs) {
6460
0
          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6461
0
                                        MachinePointerInfo(), VT);
6462
0
          MemOpChains.push_back(Load.getValue(1));
6463
0
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6464
0
6465
0
          ArgOffset += PtrByteSize;
6466
0
        } else {
6467
0
          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6468
0
                                          PtrOff.getValueType());
6469
0
          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6470
0
          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6471
0
                                                            CallSeqStart,
6472
0
                                                            Flags, DAG, dl);
6473
0
          ArgOffset += PtrByteSize;
6474
0
        }
6475
0
        continue;
6476
0
      }
6477
0
      // Copy entire object into memory.  There are cases where gcc-generated
6478
0
      // code assumes it is there, even if it could be put entirely into
6479
0
      // registers.  (This is not what the doc says.)
6480
0
      Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6481
0
                                                        CallSeqStart,
6482
0
                                                        Flags, DAG, dl);
6483
0
6484
0
      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6485
0
      // copy the pieces of the object that fit into registers from the
6486
0
      // parameter save area.
6487
0
      for (unsigned j=0; j<Size; j+=PtrByteSize) {
6488
0
        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6489
0
        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6490
0
        if (GPR_idx != NumGPRs) {
6491
0
          SDValue Load =
6492
0
              DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6493
0
          MemOpChains.push_back(Load.getValue(1));
6494
0
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6495
0
          ArgOffset += PtrByteSize;
6496
0
        } else {
6497
0
          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6498
0
          break;
6499
0
        }
6500
0
      }
6501
0
      continue;
6502
0
    }
6503
0
6504
0
    switch (Arg.getSimpleValueType().SimpleTy) {
6505
0
    default: llvm_unreachable("Unexpected ValueType for argument!");
6506
0
    case MVT::i1:
6507
0
    case MVT::i32:
6508
0
    case MVT::i64:
6509
0
      if (GPR_idx != NumGPRs) {
6510
0
        if (Arg.getValueType() == MVT::i1)
6511
0
          Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6512
0
6513
0
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6514
0
      } else {
6515
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6516
0
                         isPPC64, isTailCall, false, MemOpChains,
6517
0
                         TailCallArguments, dl);
6518
0
      }
6519
0
      ArgOffset += PtrByteSize;
6520
0
      break;
6521
0
    case MVT::f32:
6522
0
    case MVT::f64:
6523
0
      if (FPR_idx != NumFPRs) {
6524
0
        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6525
0
6526
0
        if (isVarArg) {
6527
0
          SDValue Store =
6528
0
              DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6529
0
          MemOpChains.push_back(Store);
6530
0
6531
0
          // Float varargs are always shadowed in available integer registers
6532
0
          if (GPR_idx != NumGPRs) {
6533
0
            SDValue Load =
6534
0
                DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6535
0
            MemOpChains.push_back(Load.getValue(1));
6536
0
            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6537
0
          }
6538
0
          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6539
0
            SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6540
0
            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6541
0
            SDValue Load =
6542
0
                DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6543
0
            MemOpChains.push_back(Load.getValue(1));
6544
0
            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6545
0
          }
6546
0
        } else {
6547
0
          // If we have any FPRs remaining, we may also have GPRs remaining.
6548
0
          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6549
0
          // GPRs.
6550
0
          if (GPR_idx != NumGPRs)
6551
0
            ++GPR_idx;
6552
0
          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6553
0
              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6554
0
            ++GPR_idx;
6555
0
        }
6556
0
      } else
6557
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6558
0
                         isPPC64, isTailCall, false, MemOpChains,
6559
0
                         TailCallArguments, dl);
6560
0
      if (isPPC64)
6561
0
        ArgOffset += 8;
6562
0
      else
6563
0
        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6564
0
      break;
6565
0
    case MVT::v4f32:
6566
0
    case MVT::v4i32:
6567
0
    case MVT::v8i16:
6568
0
    case MVT::v16i8:
6569
0
      if (isVarArg) {
6570
0
        // These go aligned on the stack, or in the corresponding R registers
6571
0
        // when within range.  The Darwin PPC ABI doc claims they also go in
6572
0
        // V registers; in fact gcc does this only for arguments that are
6573
0
        // prototyped, not for those that match the ...  We do it for all
6574
0
        // arguments, seems to work.
6575
0
        while (ArgOffset % 16 !=0) {
6576
0
          ArgOffset += PtrByteSize;
6577
0
          if (GPR_idx != NumGPRs)
6578
0
            GPR_idx++;
6579
0
        }
6580
0
        // We could elide this store in the case where the object fits
6581
0
        // entirely in R registers.  Maybe later.
6582
0
        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6583
0
                             DAG.getConstant(ArgOffset, dl, PtrVT));
6584
0
        SDValue Store =
6585
0
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6586
0
        MemOpChains.push_back(Store);
6587
0
        if (VR_idx != NumVRs) {
6588
0
          SDValue Load =
6589
0
              DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6590
0
          MemOpChains.push_back(Load.getValue(1));
6591
0
          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6592
0
        }
6593
0
        ArgOffset += 16;
6594
0
        for (unsigned i=0; i<16; i+=PtrByteSize) {
6595
0
          if (GPR_idx == NumGPRs)
6596
0
            break;
6597
0
          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6598
0
                                   DAG.getConstant(i, dl, PtrVT));
6599
0
          SDValue Load =
6600
0
              DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6601
0
          MemOpChains.push_back(Load.getValue(1));
6602
0
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6603
0
        }
6604
0
        break;
6605
0
      }
6606
0
6607
0
      // Non-varargs Altivec params generally go in registers, but have
6608
0
      // stack space allocated at the end.
6609
0
      if (VR_idx != NumVRs) {
6610
0
        // Doesn't have GPR space allocated.
6611
0
        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6612
0
      } else if (nAltivecParamsAtEnd==0) {
6613
0
        // We are emitting Altivec params in order.
6614
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6615
0
                         isPPC64, isTailCall, true, MemOpChains,
6616
0
                         TailCallArguments, dl);
6617
0
        ArgOffset += 16;
6618
0
      }
6619
0
      break;
6620
0
    }
6621
0
  }
6622
0
  // If all Altivec parameters fit in registers, as they usually do,
6623
0
  // they get stack space following the non-Altivec parameters.  We
6624
0
  // don't track this here because nobody below needs it.
6625
0
  // If there are more Altivec parameters than fit in registers emit
6626
0
  // the stores here.
6627
0
  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6628
0
    unsigned j = 0;
6629
0
    // Offset is aligned; skip 1st 12 params which go in V registers.
6630
0
    ArgOffset = ((ArgOffset+15)/16)*16;
6631
0
    ArgOffset += 12*16;
6632
0
    for (unsigned i = 0; i != NumOps; ++i) {
6633
0
      SDValue Arg = OutVals[i];
6634
0
      EVT ArgType = Outs[i].VT;
6635
0
      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6636
0
          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6637
0
        if (++j > NumVRs) {
6638
0
          SDValue PtrOff;
6639
0
          // We are emitting Altivec params in order.
6640
0
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6641
0
                           isPPC64, isTailCall, true, MemOpChains,
6642
0
                           TailCallArguments, dl);
6643
0
          ArgOffset += 16;
6644
0
        }
6645
0
      }
6646
0
    }
6647
0
  }
6648
0
6649
0
  if (!MemOpChains.empty())
6650
0
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6651
0
6652
0
  // On Darwin, R12 must contain the address of an indirect callee.  This does
6653
0
  // not mean the MTCTR instruction must use R12; it's easier to model this as
6654
0
  // an extra parameter, so do that.
6655
0
  if (!isTailCall &&
6656
0
      !isFunctionGlobalAddress(Callee) &&
6657
0
      !isa<ExternalSymbolSDNode>(Callee) &&
6658
0
      !isBLACompatibleAddress(Callee, DAG))
6659
0
    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6660
0
                                                   PPC::R12), Callee));
6661
0
6662
0
  // Build a sequence of copy-to-reg nodes chained together with token chain
6663
0
  // and flag operands which copy the outgoing args into the appropriate regs.
6664
0
  SDValue InFlag;
6665
0
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6666
0
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6667
0
                             RegsToPass[i].second, InFlag);
6668
0
    InFlag = Chain.getValue(1);
6669
0
  }
6670
0
6671
0
  if (isTailCall)
6672
0
    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6673
0
                    TailCallArguments);
6674
0
6675
0
  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6676
0
                    /* unused except on PPC64 ELFv1 */ false, DAG,
6677
0
                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6678
0
                    NumBytes, Ins, InVals, CS);
6679
0
}
6680
6681
6682
SDValue PPCTargetLowering::LowerCall_AIX(
6683
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6684
    bool isTailCall, bool isPatchPoint,
6685
    const SmallVectorImpl<ISD::OutputArg> &Outs,
6686
    const SmallVectorImpl<SDValue> &OutVals,
6687
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6688
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6689
22
    ImmutableCallSite CS) const {
6690
22
6691
22
  assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
6692
22
         "Unimplemented calling convention!");
6693
22
  if (isVarArg || isPatchPoint)
6694
0
    report_fatal_error("This call type is unimplemented on AIX.");
6695
22
6696
22
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6697
22
  bool isPPC64 = PtrVT == MVT::i64;
6698
22
  unsigned PtrByteSize = isPPC64 ? 
811
:
411
;
6699
22
  unsigned NumOps = Outs.size();
6700
22
6701
22
6702
22
  // Count how many bytes are to be pushed on the stack, including the linkage
6703
22
  // area, parameter list area.
6704
22
  // On XCOFF, we start with 24/48, which is reserved space for
6705
22
  // [SP][CR][LR][2 x reserved][TOC].
6706
22
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6707
22
6708
22
  // The prolog code of the callee may store up to 8 GPR argument registers to
6709
22
  // the stack, allowing va_start to index over them in memory if the callee
6710
22
  // is variadic.
6711
22
  // Because we cannot tell if this is needed on the caller side, we have to
6712
22
  // conservatively assume that it is needed.  As such, make sure we have at
6713
22
  // least enough stack space for the caller to store the 8 GPRs.
6714
22
  unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
6715
22
6716
22
  // Adjust the stack pointer for the new arguments...
6717
22
  // These operations are automatically eliminated by the prolog/epilog
6718
22
  // inserter pass.
6719
22
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6720
22
  SDValue CallSeqStart = Chain;
6721
22
6722
22
  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6723
22
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6724
22
    PPC::R7, PPC::R8, PPC::R9, PPC::R10
6725
22
  };
6726
22
  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6727
22
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6728
22
    PPC::X7, PPC::X8, PPC::X9, PPC::X10
6729
22
  };
6730
22
6731
22
  const unsigned NumGPRs = isPPC64 ? 
array_lengthof(GPR_64)11
6732
22
                                   : 
array_lengthof(GPR_32)11
;
6733
22
  const MCPhysReg *GPR = isPPC64 ? 
GPR_6411
:
GPR_3211
;
6734
22
  unsigned GPR_idx = 0;
6735
22
6736
22
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6737
22
6738
22
  if (isTailCall)
6739
0
    report_fatal_error("Handling of tail call is unimplemented!");
6740
22
  int SPDiff = 0;
6741
22
6742
81
  for (unsigned i = 0; i != NumOps; 
++i59
) {
6743
59
    SDValue Arg = OutVals[i];
6744
59
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
6745
59
6746
59
    // Promote integers if needed.
6747
59
    if (Arg.getValueType() == MVT::i1 ||
6748
59
        
(57
isPPC6457
&&
Arg.getValueType() == MVT::i3228
)) {
6749
28
      unsigned ExtOp = Flags.isSExt() ? 
ISD::SIGN_EXTEND13
:
ISD::ZERO_EXTEND15
;
6750
28
      Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
6751
28
    }
6752
59
6753
59
    // Note: "by value" is code for passing a structure by value, not
6754
59
    // basic types.
6755
59
    if (Flags.isByVal())
6756
0
      report_fatal_error("Passing structure by value is unimplemented!");
6757
59
6758
59
    switch (Arg.getSimpleValueType().SimpleTy) {
6759
59
    
default: 0
llvm_unreachable0
("Unexpected ValueType for argument!");
6760
59
    case MVT::i1:
6761
59
    case MVT::i32:
6762
59
    case MVT::i64:
6763
59
      if (GPR_idx != NumGPRs)
6764
59
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6765
0
      else
6766
0
        report_fatal_error("Handling of placing parameters on the stack is "
6767
0
                           "unimplemented!");
6768
59
      break;
6769
59
    case MVT::f32:
6770
0
    case MVT::f64:
6771
0
    case MVT::v4f32:
6772
0
    case MVT::v4i32:
6773
0
    case MVT::v8i16:
6774
0
    case MVT::v16i8:
6775
0
    case MVT::v2f64:
6776
0
    case MVT::v2i64:
6777
0
    case MVT::v1i128:
6778
0
    case MVT::f128:
6779
0
    case MVT::v4f64:
6780
0
    case MVT::v4i1:
6781
0
      report_fatal_error("Handling of this parameter type is unimplemented!");
6782
59
    }
6783
59
  }
6784
22
6785
22
  if (!isFunctionGlobalAddress(Callee) &&
6786
22
      
!isa<ExternalSymbolSDNode>(Callee)0
)
6787
0
    report_fatal_error("Handling of indirect call is unimplemented!");
6788
22
6789
22
  // Build a sequence of copy-to-reg nodes chained together with token chain
6790
22
  // and flag operands which copy the outgoing args into the appropriate regs.
6791
22
  SDValue InFlag;
6792
59
  for (auto Reg : RegsToPass) {
6793
59
    Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
6794
59
    InFlag = Chain.getValue(1);
6795
59
  }
6796
22
6797
22
  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6798
22
                    /* unused except on PPC64 ELFv1 */ false, DAG,
6799
22
                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6800
22
                    NumBytes, Ins, InVals, CS);
6801
22
}
6802
6803
bool
6804
PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6805
                                  MachineFunction &MF, bool isVarArg,
6806
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
6807
13.5k
                                  LLVMContext &Context) const {
6808
13.5k
  SmallVector<CCValAssign, 16> RVLocs;
6809
13.5k
  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6810
13.5k
  return CCInfo.CheckReturn(
6811
13.5k
      Outs, (Subtarget.isSVR4ABI() && 
CallConv == CallingConv::Cold13.4k
)
6812
13.5k
                ? 
RetCC_PPC_Cold4
6813
13.5k
                : 
RetCC_PPC13.5k
);
6814
13.5k
}
6815
6816
SDValue
6817
PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6818
                               bool isVarArg,
6819
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
6820
                               const SmallVectorImpl<SDValue> &OutVals,
6821
10.5k
                               const SDLoc &dl, SelectionDAG &DAG) const {
6822
10.5k
  SmallVector<CCValAssign, 16> RVLocs;
6823
10.5k
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6824
10.5k
                 *DAG.getContext());
6825
10.5k
  CCInfo.AnalyzeReturn(Outs,
6826
10.5k
                       (Subtarget.isSVR4ABI() && 
CallConv == CallingConv::Cold10.5k
)
6827
10.5k
                           ? 
RetCC_PPC_Cold2
6828
10.5k
                           : 
RetCC_PPC10.5k
);
6829
10.5k
6830
10.5k
  SDValue Flag;
6831
10.5k
  SmallVector<SDValue, 4> RetOps(1, Chain);
6832
10.5k
6833
10.5k
  // Copy the result values into the output registers.
6834
18.8k
  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); 
++i, ++RealResIdx8.30k
) {
6835
8.30k
    CCValAssign &VA = RVLocs[i];
6836
8.30k
    assert(VA.isRegLoc() && "Can only return in registers!");
6837
8.30k
6838
8.30k
    SDValue Arg = OutVals[RealResIdx];
6839
8.30k
6840
8.30k
    switch (VA.getLocInfo()) {
6841
8.30k
    
default: 0
llvm_unreachable0
("Unknown loc info!");
6842
8.30k
    
case CCValAssign::Full: break6.32k
;
6843
8.30k
    case CCValAssign::AExt:
6844
914
      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6845
914
      break;
6846
8.30k
    case CCValAssign::ZExt:
6847
235
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6848
235
      break;
6849
8.30k
    case CCValAssign::SExt:
6850
839
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6851
839
      break;
6852
8.30k
    }
6853
8.30k
    if (Subtarget.hasSPE() && 
VA.getLocVT() == MVT::f6467
) {
6854
11
      bool isLittleEndian = Subtarget.isLittleEndian();
6855
11
      // Legalize ret f64 -> ret 2 x i32.
6856
11
      SDValue SVal =
6857
11
          DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6858
11
                      DAG.getIntPtrConstant(isLittleEndian ? 
00
: 1, dl));
6859
11
      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6860
11
      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6861
11
      SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6862
11
                         DAG.getIntPtrConstant(isLittleEndian ? 
10
: 0, dl));
6863
11
      Flag = Chain.getValue(1);
6864
11
      VA = RVLocs[++i]; // skip ahead to next loc
6865
11
      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6866
11
    } else
6867
8.29k
      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6868
8.30k
    Flag = Chain.getValue(1);
6869
8.30k
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6870
8.30k
  }
6871
10.5k
6872
10.5k
  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6873
10.5k
  const MCPhysReg *I =
6874
10.5k
    TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6875
10.5k
  if (I) {
6876
158
    for (; *I; 
++I155
) {
6877
155
6878
155
      if (PPC::G8RCRegClass.contains(*I))
6879
56
        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6880
99
      else if (PPC::F8RCRegClass.contains(*I))
6881
54
        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6882
45
      else if (PPC::CRRCRegClass.contains(*I))
6883
9
        RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6884
36
      else if (PPC::VRRCRegClass.contains(*I))
6885
36
        RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6886
36
      else
6887
36
        
llvm_unreachable0
("Unexpected register class in CSRsViaCopy!");
6888
155
    }
6889
3
  }
6890
10.5k
6891
10.5k
  RetOps[0] = Chain;  // Update chain.
6892
10.5k
6893
10.5k
  // Add the flag if we have it.
6894
10.5k
  if (Flag.getNode())
6895
7.63k
    RetOps.push_back(Flag);
6896
10.5k
6897
10.5k
  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6898
10.5k
}
6899
6900
SDValue
6901
PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6902
1
                                                SelectionDAG &DAG) const {
6903
1
  SDLoc dl(Op);
6904
1
6905
1
  // Get the correct type for integers.
6906
1
  EVT IntVT = Op.getValueType();
6907
1
6908
1
  // Get the inputs.
6909
1
  SDValue Chain = Op.getOperand(0);
6910
1
  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6911
1
  // Build a DYNAREAOFFSET node.
6912
1
  SDValue Ops[2] = {Chain, FPSIdx};
6913
1
  SDVTList VTs = DAG.getVTList(IntVT);
6914
1
  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6915
1
}
6916
6917
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6918
1
                                             SelectionDAG &DAG) const {
6919
1
  // When we pop the dynamic allocation we need to restore the SP link.
6920
1
  SDLoc dl(Op);
6921
1
6922
1
  // Get the correct type for pointers.
6923
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6924
1
6925
1
  // Construct the stack pointer operand.
6926
1
  bool isPPC64 = Subtarget.isPPC64();
6927
1
  unsigned SP = isPPC64 ? PPC::X1 : 
PPC::R10
;
6928
1
  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6929
1
6930
1
  // Get the operands for the STACKRESTORE.
6931
1
  SDValue Chain = Op.getOperand(0);
6932
1
  SDValue SaveSP = Op.getOperand(1);
6933
1
6934
1
  // Load the old link SP.
6935
1
  SDValue LoadLinkSP =
6936
1
      DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6937
1
6938
1
  // Restore the stack pointer.
6939
1
  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6940
1
6941
1
  // Store the old link SP.
6942
1
  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6943
1
}
6944
6945
8
SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6946
8
  MachineFunction &MF = DAG.getMachineFunction();
6947
8
  bool isPPC64 = Subtarget.isPPC64();
6948
8
  EVT PtrVT = getPointerTy(MF.getDataLayout());
6949
8
6950
8
  // Get current frame pointer save index.  The users of this index will be
6951
8
  // primarily DYNALLOC instructions.
6952
8
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6953
8
  int RASI = FI->getReturnAddrSaveIndex();
6954
8
6955
8
  // If the frame pointer save index hasn't been defined yet.
6956
8
  if (!RASI) {
6957
8
    // Find out what the fix offset of the frame pointer save area.
6958
8
    int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6959
8
    // Allocate the frame index for frame pointer save area.
6960
8
    RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 
85
:
43
, LROffset, false);
6961
8
    // Save the result.
6962
8
    FI->setReturnAddrSaveIndex(RASI);
6963
8
  }
6964
8
  return DAG.getFrameIndex(RASI, PtrVT);
6965
8
}
6966
6967
SDValue
6968
22
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6969
22
  MachineFunction &MF = DAG.getMachineFunction();
6970
22
  bool isPPC64 = Subtarget.isPPC64();
6971
22
  EVT PtrVT = getPointerTy(MF.getDataLayout());
6972
22
6973
22
  // Get current frame pointer save index.  The users of this index will be
6974
22
  // primarily DYNALLOC instructions.
6975
22
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6976
22
  int FPSI = FI->getFramePointerSaveIndex();
6977
22
6978
22
  // If the frame pointer save index hasn't been defined yet.
6979
22
  if (!FPSI) {
6980
20
    // Find out what the fix offset of the frame pointer save area.
6981
20
    int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6982
20
    // Allocate the frame index for frame pointer save area.
6983
20
    FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 
813
:
47
, FPOffset, true);
6984
20
    // Save the result.
6985
20
    FI->setFramePointerSaveIndex(FPSI);
6986
20
  }
6987
22
  return DAG.getFrameIndex(FPSI, PtrVT);
6988
22
}
6989
6990
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6991
21
                                                   SelectionDAG &DAG) const {
6992
21
  // Get the inputs.
6993
21
  SDValue Chain = Op.getOperand(0);
6994
21
  SDValue Size  = Op.getOperand(1);
6995
21
  SDLoc dl(Op);
6996
21
6997
21
  // Get the correct type for pointers.
6998
21
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6999
21
  // Negate the size.
7000
21
  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7001
21
                                DAG.getConstant(0, dl, PtrVT), Size);
7002
21
  // Construct a node for the frame pointer save index.
7003
21
  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7004
21
  // Build a DYNALLOC node.
7005
21
  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7006
21
  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7007
21
  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7008
21
}
7009
7010
SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7011
1
                                                     SelectionDAG &DAG) const {
7012
1
  MachineFunction &MF = DAG.getMachineFunction();
7013
1
7014
1
  bool isPPC64 = Subtarget.isPPC64();
7015
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7016
1
7017
1
  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 
40
, 0, false);
7018
1
  return DAG.getFrameIndex(FI, PtrVT);
7019
1
}
7020
7021
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7022
8
                                               SelectionDAG &DAG) const {
7023
8
  SDLoc DL(Op);
7024
8
  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7025
8
                     DAG.getVTList(MVT::i32, MVT::Other),
7026
8
                     Op.getOperand(0), Op.getOperand(1));
7027
8
}
7028
7029
SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7030
5
                                                SelectionDAG &DAG) const {
7031
5
  SDLoc DL(Op);
7032
5
  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7033
5
                     Op.getOperand(0), Op.getOperand(1));
7034
5
}
7035
7036
102
SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7037
102
  if (Op.getValueType().isVector())
7038
89
    return LowerVectorLoad(Op, DAG);
7039
13
7040
13
  assert(Op.getValueType() == MVT::i1 &&
7041
13
         "Custom lowering only for i1 loads");
7042
13
7043
13
  // First, load 8 bits into 32 bits, then truncate to 1 bit.
7044
13
7045
13
  SDLoc dl(Op);
7046
13
  LoadSDNode *LD = cast<LoadSDNode>(Op);
7047
13
7048
13
  SDValue Chain = LD->getChain();
7049
13
  SDValue BasePtr = LD->getBasePtr();
7050
13
  MachineMemOperand *MMO = LD->getMemOperand();
7051
13
7052
13
  SDValue NewLD =
7053
13
      DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7054
13
                     BasePtr, MVT::i8, MMO);
7055
13
  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7056
13
7057
13
  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7058
13
  return DAG.getMergeValues(Ops, dl);
7059
13
}
7060
7061
48
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7062
48
  if (Op.getOperand(1).getValueType().isVector())
7063
47
    return LowerVectorStore(Op, DAG);
7064
1
7065
1
  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7066
1
         "Custom lowering only for i1 stores");
7067
1
7068
1
  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7069
1
7070
1
  SDLoc dl(Op);
7071
1
  StoreSDNode *ST = cast<StoreSDNode>(Op);
7072
1
7073
1
  SDValue Chain = ST->getChain();
7074
1
  SDValue BasePtr = ST->getBasePtr();
7075
1
  SDValue Value = ST->getValue();
7076
1
  MachineMemOperand *MMO = ST->getMemOperand();
7077
1
7078
1
  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7079
1
                      Value);
7080
1
  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7081
1
}
7082
7083
// FIXME: Remove this once the ANDI glue bug is fixed:
7084
0
SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7085
0
  assert(Op.getValueType() == MVT::i1 &&
7086
0
         "Custom lowering only for i1 results");
7087
0
7088
0
  SDLoc DL(Op);
7089
0
  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
7090
0
                     Op.getOperand(0));
7091
0
}
7092
7093
SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7094
24
                                               SelectionDAG &DAG) const {
7095
24
7096
24
  // Implements a vector truncate that fits in a vector register as a shuffle.
7097
24
  // We want to legalize vector truncates down to where the source fits in
7098
24
  // a vector register (and target is therefore smaller than vector register
7099
24
  // size).  At that point legalization will try to custom lower the sub-legal
7100
24
  // result and get here - where we can contain the truncate as a single target
7101
24
  // operation.
7102
24
7103
24
  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7104
24
  //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7105
24
  //
7106
24
  // We will implement it for big-endian ordering as this (where x denotes
7107
24
  // undefined):
7108
24
  //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7109
24
  //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7110
24
  //
7111
24
  // The same operation in little-endian ordering will be:
7112
24
  //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7113
24
  //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7114
24
7115
24
  assert(Op.getValueType().isVector() && "Vector type expected.");
7116
24
7117
24
  SDLoc DL(Op);
7118
24
  SDValue N1 = Op.getOperand(0);
7119
24
  unsigned SrcSize = N1.getValueType().getSizeInBits();
7120
24
  assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
7121
24
  SDValue WideSrc = SrcSize == 128 ? 
N118
:
widenVec(DAG, N1, DL)6
;
7122
24
7123
24
  EVT TrgVT = Op.getValueType();
7124
24
  unsigned TrgNumElts = TrgVT.getVectorNumElements();
7125
24
  EVT EltVT = TrgVT.getVectorElementType();
7126
24
  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7127
24
  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7128
24
7129
24
  // First list the elements we want to keep.
7130
24
  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7131
24
  SmallVector<int, 16> ShuffV;
7132
24
  if (Subtarget.isLittleEndian())
7133
53
    
for (unsigned i = 0; 11
i < TrgNumElts;
++i42
)
7134
42
      ShuffV.push_back(i * SizeMult);
7135
13
  else
7136
63
    
for (unsigned i = 1; 13
i <= TrgNumElts;
++i50
)
7137
50
      ShuffV.push_back(i * SizeMult - 1);
7138
24
7139
24
  // Populate the remaining elements with undefs.
7140
188
  for (unsigned i = TrgNumElts; i < WideNumElts; 
++i164
)
7141
164
    // ShuffV.push_back(i + WideNumElts);
7142
164
    ShuffV.push_back(WideNumElts + 1);
7143
24
7144
24
  SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
7145
24
  return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
7146
24
}
7147
7148
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7149
/// possible.
7150
267
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7151
267
  // Not FP? Not a fsel.
7152
267
  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7153
267
      
!Op.getOperand(2).getValueType().isFloatingPoint()125
)
7154
142
    return Op;
7155
125
7156
125
  // We might be able to do better than this under some circumstances, but in
7157
125
  // general, fsel-based lowering of select is a finite-math-only optimization.
7158
125
  // For more information, see section F.3 of the 2.06 ISA specification.
7159
125
  if (!DAG.getTarget().Options.NoInfsFPMath ||
7160
125
      
!DAG.getTarget().Options.NoNaNsFPMath19
)
7161
106
    return Op;
7162
19
  // TODO: Propagate flags from the select rather than global settings.
7163
19
  SDNodeFlags Flags;
7164
19
  Flags.setNoInfs(true);
7165
19
  Flags.setNoNaNs(true);
7166
19
7167
19
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7168
19
7169
19
  EVT ResVT = Op.getValueType();
7170
19
  EVT CmpVT = Op.getOperand(0).getValueType();
7171
19
  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7172
19
  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7173
19
  SDLoc dl(Op);
7174
19
7175
19
  // If the RHS of the comparison is a 0.0, we don't need to do the
7176
19
  // subtraction at all.
7177
19
  SDValue Sel1;
7178
19
  if (isFloatingPointZero(RHS))
7179
9
    switch (CC) {
7180
9
    
default: break0
; // SETUO etc aren't handled by fsel.
7181
9
    case ISD::SETNE:
7182
0
      std::swap(TV, FV);
7183
0
      LLVM_FALLTHROUGH;
7184
2
    case ISD::SETEQ:
7185
2
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7186
0
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7187
2
      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7188
2
      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7189
0
        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7190
2
      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7191
2
                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7192
5
    case ISD::SETULT:
7193
5
    case ISD::SETLT:
7194
5
      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7195
5
      LLVM_FALLTHROUGH;
7196
5
    case ISD::SETOGE:
7197
5
    case ISD::SETGE:
7198
5
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7199
0
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7200
5
      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7201
5
    case ISD::SETUGT:
7202
2
    case ISD::SETGT:
7203
2
      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7204
2
      LLVM_FALLTHROUGH;
7205
2
    case ISD::SETOLE:
7206
2
    case ISD::SETLE:
7207
2
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7208
0
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7209
2
      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7210
2
                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7211
10
    }
7212
10
7213
10
  SDValue Cmp;
7214
10
  switch (CC) {
7215
10
  
default: break0
; // SETUO etc aren't handled by fsel.
7216
10
  case ISD::SETNE:
7217
0
    std::swap(TV, FV);
7218
0
    LLVM_FALLTHROUGH;
7219
2
  case ISD::SETEQ:
7220
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7221
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7222
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7223
2
    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7224
2
    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7225
0
      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7226
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7227
2
                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7228
2
  case ISD::SETULT:
7229
2
  case ISD::SETLT:
7230
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7231
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7232
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7233
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7234
2
  case ISD::SETOGE:
7235
2
  case ISD::SETGE:
7236
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7237
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7238
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7239
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7240
2
  case ISD::SETUGT:
7241
2
  case ISD::SETGT:
7242
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7243
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7244
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7245
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7246
2
  case ISD::SETOLE:
7247
2
  case ISD::SETLE:
7248
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7249
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7250
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7251
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7252
0
  }
7253
0
  return Op;
7254
0
}
7255
7256
void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7257
                                               SelectionDAG &DAG,
7258
74
                                               const SDLoc &dl) const {
7259
74
  assert(Op.getOperand(0).getValueType().isFloatingPoint());
7260
74
  SDValue Src = Op.getOperand(0);
7261
74
  if (Src.getValueType() == MVT::f32)
7262
45
    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7263
74
7264
74
  SDValue Tmp;
7265
74
  switch (Op.getSimpleValueType().SimpleTy) {
7266
74
  
default: 0
llvm_unreachable0
("Unhandled FP_TO_INT type in custom expander!");
7267
74
  case MVT::i32:
7268
35
    Tmp = DAG.getNode(
7269
35
        Op.getOpcode() == ISD::FP_TO_SINT
7270
35
            ? 
PPCISD::FCTIWZ27
7271
35
            : 
(Subtarget.hasFPCVT() 8
?
PPCISD::FCTIWUZ6
:
PPCISD::FCTIDZ2
),
7272
35
        dl, MVT::f64, Src);
7273
35
    break;
7274
74
  case MVT::i64:
7275
39
    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7276
39
           "i64 FP_TO_UINT is supported only with FPCVT");
7277
39
    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? 
PPCISD::FCTIDZ26
:
7278
39
                                                        
PPCISD::FCTIDUZ13
,
7279
39
                      dl, MVT::f64, Src);
7280
39
    break;
7281
74
  }
7282
74
7283
74
  // Convert the FP value to an int value through memory.
7284
74
  bool i32Stack = Op.getValueType() == MVT::i32 && 
Subtarget.hasSTFIWX()35
&&
7285
74
    
(19
Op.getOpcode() == ISD::FP_TO_SINT19
||
Subtarget.hasFPCVT()8
);
7286
74
  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? 
MVT::i3217
:
MVT::f6457
);
7287
74
  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7288
74
  MachinePointerInfo MPI =
7289
74
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
7290
74
7291
74
  // Emit a store to the stack slot.
7292
74
  SDValue Chain;
7293
74
  if (i32Stack) {
7294
17
    MachineFunction &MF = DAG.getMachineFunction();
7295
17
    MachineMemOperand *MMO =
7296
17
      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
7297
17
    SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
7298
17
    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7299
17
              DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7300
17
  } else
7301
57
    Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
7302
74
7303
74
  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
7304
74
  // add in a bias on big endian.
7305
74
  if (Op.getValueType() == MVT::i32 && 
!i32Stack35
) {
7306
18
    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7307
18
                        DAG.getConstant(4, dl, FIPtr.getValueType()));
7308
18
    MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 
00
: 4);
7309
18
  }
7310
74
7311
74
  RLI.Chain = Chain;
7312
74
  RLI.Ptr = FIPtr;
7313
74
  RLI.MPI = MPI;
7314
74
}
7315
7316
/// Custom lowers floating point to integer conversions to use
7317
/// the direct move instructions available in ISA 2.07 to avoid the
7318
/// need for load/store combinations.
7319
SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7320
                                                    SelectionDAG &DAG,
7321
1.64k
                                                    const SDLoc &dl) const {
7322
1.64k
  assert(Op.getOperand(0).getValueType().isFloatingPoint());
7323
1.64k
  SDValue Src = Op.getOperand(0);
7324
1.64k
7325
1.64k
  if (Src.getValueType() == MVT::f32)
7326
813
    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7327
1.64k
7328
1.64k
  SDValue Tmp;
7329
1.64k
  switch (Op.getSimpleValueType().SimpleTy) {
7330
1.64k
  
default: 0
llvm_unreachable0
("Unhandled FP_TO_INT type in custom expander!");
7331
1.64k
  case MVT::i32:
7332
1.26k
    Tmp = DAG.getNode(
7333
1.26k
        Op.getOpcode() == ISD::FP_TO_SINT
7334
1.26k
            ? 
PPCISD::FCTIWZ1.01k
7335
1.26k
            : 
(Subtarget.hasFPCVT() 257
?
PPCISD::FCTIWUZ257
:
PPCISD::FCTIDZ0
),
7336
1.26k
        dl, MVT::f64, Src);
7337
1.26k
    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
7338
1.26k
    break;
7339
1.64k
  case MVT::i64:
7340
374
    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7341
374
           "i64 FP_TO_UINT is supported only with FPCVT");
7342
374
    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? 
PPCISD::FCTIDZ96
:
7343
374
                                                        
PPCISD::FCTIDUZ278
,
7344
374
                      dl, MVT::f64, Src);
7345
374
    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
7346
374
    break;
7347
1.64k
  }
7348
1.64k
  return Tmp;
7349
1.64k
}
7350
7351
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7352
1.76k
                                          const SDLoc &dl) const {
7353
1.76k
7354
1.76k
  // FP to INT conversions are legal for f128.
7355
1.76k
  if (EnableQuadPrecision && 
(Op->getOperand(0).getValueType() == MVT::f128)42
)
7356
34
    return Op;
7357
1.72k
7358
1.72k
  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7359
1.72k
  // PPC (the libcall is not available).
7360
1.72k
  if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
7361
14
    if (Op.getValueType() == MVT::i32) {
7362
12
      if (Op.getOpcode() == ISD::FP_TO_SINT) {
7363
8
        SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7364
8
                                 MVT::f64, Op.getOperand(0),
7365
8
                                 DAG.getIntPtrConstant(0, dl));
7366
8
        SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7367
8
                                 MVT::f64, Op.getOperand(0),
7368
8
                                 DAG.getIntPtrConstant(1, dl));
7369
8
7370
8
        // Add the two halves of the long double in round-to-zero mode.
7371
8
        SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7372
8
7373
8
        // Now use a smaller FP_TO_SINT.
7374
8
        return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7375
8
      }
7376
4
      if (Op.getOpcode() == ISD::FP_TO_UINT) {
7377
4
        const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7378
4
        APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7379
4
        SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
7380
4
        //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7381
4
        // FIXME: generated code sucks.
7382
4
        // TODO: Are there fast-math-flags to propagate to this FSUB?
7383
4
        SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
7384
4
                                   Op.getOperand(0), Tmp);
7385
4
        True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7386
4
        True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
7387
4
                           DAG.getConstant(0x80000000, dl, MVT::i32));
7388
4
        SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
7389
4
                                    Op.getOperand(0));
7390
4
        return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
7391
4
                               ISD::SETGE);
7392
4
      }
7393
2
    }
7394
2
7395
2
    return SDValue();
7396
2
  }
7397
1.71k
7398
1.71k
  if (Subtarget.hasDirectMove() && 
Subtarget.isPPC64()1.64k
)
7399
1.64k
    return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7400
74
7401
74
  ReuseLoadInfo RLI;
7402
74
  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7403
74
7404
74
  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7405
74
                     RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7406
74
}
7407
7408
// We're trying to insert a regular store, S, and then a load, L. If the
7409
// incoming value, O, is a load, we might just be able to have our load use the
7410
// address used by O. However, we don't know if anything else will store to
7411
// that address before we can load from it. To prevent this situation, we need
7412
// to insert our load, L, into the chain as a peer of O. To do this, we give L
7413
// the same chain operand as O, we create a token factor from the chain results
7414
// of O and L, and we replace all uses of O's chain result with that token
7415
// factor (see spliceIntoChain below for this last part).
7416
bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7417
                                            ReuseLoadInfo &RLI,
7418
                                            SelectionDAG &DAG,
7419
136
                                            ISD::LoadExtType ET) const {
7420
136
  SDLoc dl(Op);
7421
136
  if (ET == ISD::NON_EXTLOAD &&
7422
136
      
(72
Op.getOpcode() == ISD::FP_TO_UINT72
||
7423
72
       Op.getOpcode() == ISD::FP_TO_SINT) &&
7424
136
      isOperationLegalOrCustom(Op.getOpcode(),
7425
0
                               Op.getOperand(0).getValueType())) {
7426
0
7427
0
    LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7428
0
    return true;
7429
0
  }
7430
136
7431
136
  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7432
136
  if (!LD || 
LD->getExtensionType() != ET25
||
LD->isVolatile()22
||
7433
136
      
LD->isNonTemporal()22
)
7434
114
    return false;
7435
22
  if (LD->getMemoryVT() != MemVT)
7436
0
    return false;
7437
22
7438
22
  RLI.Ptr = LD->getBasePtr();
7439
22
  if (LD->isIndexed() && 
!LD->getOffset().isUndef()0
) {
7440
0
    assert(LD->getAddressingMode() == ISD::PRE_INC &&
7441
0
           "Non-pre-inc AM on PPC?");
7442
0
    RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7443
0
                          LD->getOffset());
7444
0
  }
7445
22
7446
22
  RLI.Chain = LD->getChain();
7447
22
  RLI.MPI = LD->getPointerInfo();
7448
22
  RLI.IsDereferenceable = LD->isDereferenceable();
7449
22
  RLI.IsInvariant = LD->isInvariant();
7450
22
  RLI.Alignment = LD->getAlignment();
7451
22
  RLI.AAInfo = LD->getAAInfo();
7452
22
  RLI.Ranges = LD->getRanges();
7453
22
7454
22
  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 
20
: 1);
7455
22
  return true;
7456
22
}
7457
7458
// Given the head of the old chain, ResChain, insert a token factor containing
7459
// it and NewResChain, and make users of ResChain now be users of that token
7460
// factor.
7461
// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7462
void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7463
                                        SDValue NewResChain,
7464
22
                                        SelectionDAG &DAG) const {
7465
22
  if (!ResChain)
7466
0
    return;
7467
22
7468
22
  SDLoc dl(NewResChain);
7469
22
7470
22
  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
7471
22
                           NewResChain, DAG.getUNDEF(MVT::Other));
7472
22
  assert(TF.getNode() != NewResChain.getNode() &&
7473
22
         "A new TF really is required here");
7474
22
7475
22
  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7476
22
  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7477
22
}
7478
7479
/// Analyze profitability of direct move
7480
/// prefer float load to int load plus direct move
7481
/// when there is no integer use of int load
7482
323
bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7483
323
  SDNode *Origin = Op.getOperand(0).getNode();
7484
323
  if (Origin->getOpcode() != ISD::LOAD)
7485
313
    return true;
7486
10
7487
10
  // If there is no LXSIBZX/LXSIHZX, like Power8,
7488
10
  // prefer direct move if the memory size is 1 or 2 bytes.
7489
10
  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7490
10
  if (!Subtarget.hasP9Vector() && 
MMO->getSize() <= 28
)
7491
2
    return true;
7492
8
7493
8
  for (SDNode::use_iterator UI = Origin->use_begin(),
7494
8
                            UE = Origin->use_end();
7495
18
       UI != UE; 
++UI10
) {
7496
12
7497
12
    // Only look at the users of the loaded value.
7498
12
    if (UI.getUse().get().getResNo() != 0)
7499
4
      continue;
7500
8
7501
8
    if (UI->getOpcode() != ISD::SINT_TO_FP &&
7502
8
        
UI->getOpcode() != ISD::UINT_TO_FP2
)
7503
2
      return true;
7504
8
  }
7505
8
7506
8
  
return false6
;
7507
8
}
7508
7509
/// Custom lowers integer to floating point conversions to use
7510
/// the direct move instructions available in ISA 2.07 to avoid the
7511
/// need for load/store combinations.
7512
SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7513
                                                    SelectionDAG &DAG,
7514
317
                                                    const SDLoc &dl) const {
7515
317
  assert((Op.getValueType() == MVT::f32 ||
7516
317
          Op.getValueType() == MVT::f64) &&
7517
317
         "Invalid floating point type as target of conversion");
7518
317
  assert(Subtarget.hasFPCVT() &&
7519
317
         "Int to FP conversions with direct moves require FPCVT");
7520
317
  SDValue FP;
7521
317
  SDValue Src = Op.getOperand(0);
7522
317
  bool SinglePrec = Op.getValueType() == MVT::f32;
7523
317
  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7524
317
  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
7525
317
  unsigned ConvOp = Signed ? 
(SinglePrec 148
?
PPCISD::FCFIDS127
:
PPCISD::FCFID21
) :
7526
317
                             
(SinglePrec 169
?
PPCISD::FCFIDUS126
:
PPCISD::FCFIDU43
);
7527
317
7528
317
  if (WordInt) {
7529
108
    FP = DAG.getNode(Signed ? 
PPCISD::MTVSRA44
:
PPCISD::MTVSRZ64
,
7530
108
                     dl, MVT::f64, Src);
7531
108
    FP = DAG.getNode(ConvOp, dl, SinglePrec ? 
MVT::f3257
:
MVT::f6451
, FP);
7532
108
  }
7533
209
  else {
7534
209
    FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
7535
209
    FP = DAG.getNode(ConvOp, dl, SinglePrec ? 
MVT::f32196
:
MVT::f6413
, FP);
7536
209
  }
7537
317
7538
317
  return FP;
7539
317
}
7540
7541
320
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7542
320
7543
320
  EVT VecVT = Vec.getValueType();
7544
320
  assert(VecVT.isVector() && "Expected a vector type.");
7545
320
  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7546
320
7547
320
  EVT EltVT = VecVT.getVectorElementType();
7548
320
  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7549
320
  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7550
320
7551
320
  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7552
320
  SmallVector<SDValue, 16> Ops(NumConcat);
7553
320
  Ops[0] = Vec;
7554
320
  SDValue UndefVec = DAG.getUNDEF(VecVT);
7555
1.56k
  for (unsigned i = 1; i < NumConcat; 
++i1.24k
)
7556
1.24k
    Ops[i] = UndefVec;
7557
320
7558
320
  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7559
320
}
7560
7561
SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7562
314
                                                const SDLoc &dl) const {
7563
314
7564
314
  unsigned Opc = Op.getOpcode();
7565
314
  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
7566
314
         "Unexpected conversion type");
7567
314
  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7568
314
         "Supports conversions to v2f64/v4f32 only.");
7569
314
7570
314
  bool SignedConv = Opc == ISD::SINT_TO_FP;
7571
314
  bool FourEltRes = Op.getValueType() == MVT::v4f32;
7572
314
7573
314
  SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
7574
314
  EVT WideVT = Wide.getValueType();
7575
314
  unsigned WideNumElts = WideVT.getVectorNumElements();
7576
314
  MVT IntermediateVT = FourEltRes ? 
MVT::v4i3284
:
MVT::v2i64230
;
7577
314
7578
314
  SmallVector<int, 16> ShuffV;
7579
3.91k
  for (unsigned i = 0; i < WideNumElts; 
++i3.60k
)
7580
3.60k
    ShuffV.push_back(i + WideNumElts);
7581
314
7582
314
  int Stride = FourEltRes ? 
WideNumElts / 484
:
WideNumElts / 2230
;
7583
314
  int SaveElts = FourEltRes ? 
484
:
2230
;
7584
314
  if (Subtarget.isLittleEndian())
7585
730
    
for (int i = 0; 206
i < SaveElts;
i++524
)
7586
524
      ShuffV[i * Stride] = i;
7587
108
  else
7588
380
    
for (int i = 1; 108
i <= SaveElts;
i++272
)
7589
272
      ShuffV[i * Stride - 1] = i - 1;
7590
314
7591
314
  SDValue ShuffleSrc2 =
7592
314
      SignedConv ? 
DAG.getUNDEF(WideVT)161
:
DAG.getConstant(0, dl, WideVT)153
;
7593
314
  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7594
314
  unsigned ExtendOp =
7595
314
      SignedConv ? 
(unsigned)PPCISD::SExtVElems161
:
(unsigned)ISD::BITCAST153
;
7596
314
7597
314
  SDValue Extend;
7598
314
  if (!Subtarget.hasP9Altivec() && 
SignedConv110
) {
7599
59
    Arrange = DAG.getBitcast(IntermediateVT, Arrange);
7600
59
    Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
7601
59
                         DAG.getValueType(Op.getOperand(0).getValueType()));
7602
59
  } else
7603
255
    Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
7604
314
7605
314
  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7606
314
}
7607
7608
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7609
1.05k
                                          SelectionDAG &DAG) const {
7610
1.05k
  SDLoc dl(Op);
7611
1.05k
7612
1.05k
  EVT InVT = Op.getOperand(0).getValueType();
7613
1.05k
  EVT OutVT = Op.getValueType();
7614
1.05k
  if (OutVT.isVector() && 
OutVT.isFloatingPoint()314
&&
7615
1.05k
      
isOperationCustom(Op.getOpcode(), InVT)314
)
7616
314
    return LowerINT_TO_FPVector(Op, DAG, dl);
7617
743
7618
743
  // Conversions to f128 are legal.
7619
743
  if (EnableQuadPrecision && 
(Op.getValueType() == MVT::f128)342
)
7620
342
    return Op;
7621
401
7622
401
  if (Subtarget.hasQPX() && 
Op.getOperand(0).getValueType() == MVT::v4i11
) {
7623
0
    if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
7624
0
      return SDValue();
7625
0
7626
0
    SDValue Value = Op.getOperand(0);
7627
0
    // The values are now known to be -1 (false) or 1 (true). To convert this
7628
0
    // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7629
0
    // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7630
0
    Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7631
0
7632
0
    SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7633
0
7634
0
    Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7635
0
7636
0
    if (Op.getValueType() != MVT::v4f64)
7637
0
      Value = DAG.getNode(ISD::FP_ROUND, dl,
7638
0
                          Op.getValueType(), Value,
7639
0
                          DAG.getIntPtrConstant(1, dl));
7640
0
    return Value;
7641
0
  }
7642
401
7643
401
  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7644
401
  if (Op.getValueType() != MVT::f32 && 
Op.getValueType() != MVT::f6497
)
7645
0
    return SDValue();
7646
401
7647
401
  if (Op.getOperand(0).getValueType() == MVT::i1)
7648
1
    return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7649
1
                       DAG.getConstantFP(1.0, dl, Op.getValueType()),
7650
1
                       DAG.getConstantFP(0.0, dl, Op.getValueType()));
7651
400
7652
400
  // If we have direct moves, we can do all the conversion, skip the store/load
7653
400
  // however, without FPCVT we can't do most conversions.
7654
400
  if (Subtarget.hasDirectMove() && 
directMoveIsProfitable(Op)323
&&
7655
400
      
Subtarget.isPPC64()317
&&
Subtarget.hasFPCVT()317
)
7656
317
    return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7657
83
7658
83
  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7659
83
         "UINT_TO_FP is supported only with FPCVT");
7660
83
7661
83
  // If we have FCFIDS, then use it when converting to single-precision.
7662
83
  // Otherwise, convert to double-precision and then round.
7663
83
  unsigned FCFOp = (Subtarget.hasFPCVT() && 
Op.getValueType() == MVT::f3260
)
7664
83
                       ? 
(Op.getOpcode() == ISD::UINT_TO_FP 34
?
PPCISD::FCFIDUS14
7665
34
                                                            : 
PPCISD::FCFIDS20
)
7666
83
                       : 
(Op.getOpcode() == ISD::UINT_TO_FP 49
?
PPCISD::FCFIDU8
7667
49
                                                            : 
PPCISD::FCFID41
);
7668
83
  MVT FCFTy = (Subtarget.hasFPCVT() && 
Op.getValueType() == MVT::f3260
)
7669
83
                  ? 
MVT::f3234
7670
83
                  : 
MVT::f6449
;
7671
83
7672
83
  if (Op.getOperand(0).getValueType() == MVT::i64) {
7673
42
    SDValue SINT = Op.getOperand(0);
7674
42
    // When converting to single-precision, we actually need to convert
7675
42
    // to double-precision first and then round to single-precision.
7676
42
    // To avoid double-rounding effects during that operation, we have
7677
42
    // to prepare the input operand.  Bits that might be truncated when
7678
42
    // converting to double-precision are replaced by a bit that won't
7679
42
    // be lost at this stage, but is below the single-precision rounding
7680
42
    // position.
7681
42
    //
7682
42
    // However, if -enable-unsafe-fp-math is in effect, accept double
7683
42
    // rounding to avoid the extra overhead.
7684
42
    if (Op.getValueType() == MVT::f32 &&
7685
42
        
!Subtarget.hasFPCVT()26
&&
7686
42
        
!DAG.getTarget().Options.UnsafeFPMath8
) {
7687
7
7688
7
      // Twiddle input to make sure the low 11 bits are zero.  (If this
7689
7
      // is the case, we are guaranteed the value will fit into the 53 bit
7690
7
      // mantissa of an IEEE double-precision value without rounding.)
7691
7
      // If any of those low 11 bits were not zero originally, make sure
7692
7
      // bit 12 (value 2048) is set instead, so that the final rounding
7693
7
      // to single-precision gets the correct result.
7694
7
      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7695
7
                                  SINT, DAG.getConstant(2047, dl, MVT::i64));
7696
7
      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7697
7
                          Round, DAG.getConstant(2047, dl, MVT::i64));
7698
7
      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7699
7
      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7700
7
                          Round, DAG.getConstant(-2048, dl, MVT::i64));
7701
7
7702
7
      // However, we cannot use that value unconditionally: if the magnitude
7703
7
      // of the input value is small, the bit-twiddling we did above might
7704
7
      // end up visibly changing the output.  Fortunately, in that case, we
7705
7
      // don't need to twiddle bits since the original input will convert
7706
7
      // exactly to double-precision floating-point already.  Therefore,
7707
7
      // construct a conditional to use the original value if the top 11
7708
7
      // bits are all sign-bit copies, and use the rounded value computed
7709
7
      // above otherwise.
7710
7
      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7711
7
                                 SINT, DAG.getConstant(53, dl, MVT::i32));
7712
7
      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7713
7
                         Cond, DAG.getConstant(1, dl, MVT::i64));
7714
7
      Cond = DAG.getSetCC(dl, MVT::i32,
7715
7
                          Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7716
7
7717
7
      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7718
7
    }
7719
42
7720
42
    ReuseLoadInfo RLI;
7721
42
    SDValue Bits;
7722
42
7723
42
    MachineFunction &MF = DAG.getMachineFunction();
7724
42
    if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7725
1
      Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7726
1
                         RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7727
1
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7728
41
    } else if (Subtarget.hasLFIWAX() &&
7729
41
               
canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)34
) {
7730
1
      MachineMemOperand *MMO =
7731
1
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7732
1
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7733
1
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7734
1
      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7735
1
                                     DAG.getVTList(MVT::f64, MVT::Other),
7736
1
                                     Ops, MVT::i32, MMO);
7737
1
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7738
40
    } else if (Subtarget.hasFPCVT() &&
7739
40
               
canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)30
) {
7740
1
      MachineMemOperand *MMO =
7741
1
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7742
1
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7743
1
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7744
1
      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7745
1
                                     DAG.getVTList(MVT::f64, MVT::Other),
7746
1
                                     Ops, MVT::i32, MMO);
7747
1
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7748
39
    } else if (((Subtarget.hasLFIWAX() &&
7749
39
                 
SINT.getOpcode() == ISD::SIGN_EXTEND32
) ||
7750
39
                
(38
Subtarget.hasFPCVT()38
&&
7751
38
                 
SINT.getOpcode() == ISD::ZERO_EXTEND28
)) &&
7752
39
               
SINT.getOperand(0).getValueType() == MVT::i322
) {
7753
2
      MachineFrameInfo &MFI = MF.getFrameInfo();
7754
2
      EVT PtrVT = getPointerTy(DAG.getDataLayout());
7755
2
7756
2
      int FrameIdx = MFI.CreateStackObject(4, 4, false);
7757
2
      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7758
2
7759
2
      SDValue Store =
7760
2
          DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7761
2
                       MachinePointerInfo::getFixedStack(
7762
2
                           DAG.getMachineFunction(), FrameIdx));
7763
2
7764
2
      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7765
2
             "Expected an i32 store");
7766
2
7767
2
      RLI.Ptr = FIdx;
7768
2
      RLI.Chain = Store;
7769
2
      RLI.MPI =
7770
2
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7771
2
      RLI.Alignment = 4;
7772
2
7773
2
      MachineMemOperand *MMO =
7774
2
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7775
2
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7776
2
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7777
2
      Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7778
1
                                     PPCISD::LFIWZX : PPCISD::LFIWAX,
7779
2
                                     dl, DAG.getVTList(MVT::f64, MVT::Other),
7780
2
                                     Ops, MVT::i32, MMO);
7781
2
    } else
7782
37
      Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7783
42
7784
42
    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7785
42
7786
42
    if (Op.getValueType() == MVT::f32 && 
!Subtarget.hasFPCVT()26
)
7787
8
      FP = DAG.getNode(ISD::FP_ROUND, dl,
7788
8
                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7789
42
    return FP;
7790
42
  }
7791
41
7792
41
  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7793
41
         "Unhandled INT_TO_FP type in custom expander!");
7794
41
  // Since we only generate this in 64-bit mode, we can take advantage of
7795
41
  // 64-bit registers.  In particular, sign extend the input value into the
7796
41
  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7797
41
  // then lfd it and fcfid it.
7798
41
  MachineFunction &MF = DAG.getMachineFunction();
7799
41
  MachineFrameInfo &MFI = MF.getFrameInfo();
7800
41
  EVT PtrVT = getPointerTy(MF.getDataLayout());
7801
41
7802
41
  SDValue Ld;
7803
41
  if (Subtarget.hasLFIWAX() || 
Subtarget.hasFPCVT()11
) {
7804
30
    ReuseLoadInfo RLI;
7805
30
    bool ReusingLoad;
7806
30
    if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7807
30
                                            DAG))) {
7808
11
      int FrameIdx = MFI.CreateStackObject(4, 4, false);
7809
11
      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7810
11
7811
11
      SDValue Store =
7812
11
          DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7813
11
                       MachinePointerInfo::getFixedStack(
7814
11
                           DAG.getMachineFunction(), FrameIdx));
7815
11
7816
11
      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7817
11
             "Expected an i32 store");
7818
11
7819
11
      RLI.Ptr = FIdx;
7820
11
      RLI.Chain = Store;
7821
11
      RLI.MPI =
7822
11
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7823
11
      RLI.Alignment = 4;
7824
11
    }
7825
30
7826
30
    MachineMemOperand *MMO =
7827
30
      MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7828
30
                              RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7829
30
    SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7830
30
    Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7831
20
                                   
PPCISD::LFIWZX10
: PPCISD::LFIWAX,
7832
30
                                 dl, DAG.getVTList(MVT::f64, MVT::Other),
7833
30
                                 Ops, MVT::i32, MMO);
7834
30
    if (ReusingLoad)
7835
19
      spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7836
30
  } else {
7837
11
    assert(Subtarget.isPPC64() &&
7838
11
           "i32->FP without LFIWAX supported only on PPC64");
7839
11
7840
11
    int FrameIdx = MFI.CreateStackObject(8, 8, false);
7841
11
    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7842
11
7843
11
    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7844
11
                                Op.getOperand(0));
7845
11
7846
11
    // STD the extended value into the stack slot.
7847
11
    SDValue Store = DAG.getStore(
7848
11
        DAG.getEntryNode(), dl, Ext64, FIdx,
7849
11
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7850
11
7851
11
    // Load the value as a double.
7852
11
    Ld = DAG.getLoad(
7853
11
        MVT::f64, dl, Store, FIdx,
7854
11
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7855
11
  }
7856
41
7857
41
  // FCFID it and return it.
7858
41
  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7859
41
  if (Op.getValueType() == MVT::f32 && 
!Subtarget.hasFPCVT()25
)
7860
9
    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7861
9
                     DAG.getIntPtrConstant(0, dl));
7862
41
  return FP;
7863
41
}
7864
7865
SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7866
1
                                            SelectionDAG &DAG) const {
7867
1
  SDLoc dl(Op);
7868
1
  /*
7869
1
   The rounding mode is in bits 30:31 of FPSR, and has the following
7870
1
   settings:
7871
1
     00 Round to nearest
7872
1
     01 Round to 0
7873
1
     10 Round to +inf
7874
1
     11 Round to -inf
7875
1
7876
1
  FLT_ROUNDS, on the other hand, expects the following:
7877
1
    -1 Undefined
7878
1
     0 Round to 0
7879
1
     1 Round to nearest
7880
1
     2 Round to +inf
7881
1
     3 Round to -inf
7882
1
7883
1
  To perform the conversion, we do:
7884
1
    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7885
1
  */
7886
1
7887
1
  MachineFunction &MF = DAG.getMachineFunction();
7888
1
  EVT VT = Op.getValueType();
7889
1
  EVT PtrVT = getPointerTy(MF.getDataLayout());
7890
1
7891
1
  // Save FP Control Word to register
7892
1
  EVT NodeTys[] = {
7893
1
    MVT::f64,    // return register
7894
1
    MVT::Glue    // unused in this context
7895
1
  };
7896
1
  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7897
1
7898
1
  // Save FP register to stack slot
7899
1
  int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7900
1
  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7901
1
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7902
1
                               MachinePointerInfo());
7903
1
7904
1
  // Load FP Control Word from low 32 bits of stack slot.
7905
1
  SDValue Four = DAG.getConstant(4, dl, PtrVT);
7906
1
  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7907
1
  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7908
1
7909
1
  // Transform as necessary
7910
1
  SDValue CWD1 =
7911
1
    DAG.getNode(ISD::AND, dl, MVT::i32,
7912
1
                CWD, DAG.getConstant(3, dl, MVT::i32));
7913
1
  SDValue CWD2 =
7914
1
    DAG.getNode(ISD::SRL, dl, MVT::i32,
7915
1
                DAG.getNode(ISD::AND, dl, MVT::i32,
7916
1
                            DAG.getNode(ISD::XOR, dl, MVT::i32,
7917
1
                                        CWD, DAG.getConstant(3, dl, MVT::i32)),
7918
1
                            DAG.getConstant(3, dl, MVT::i32)),
7919
1
                DAG.getConstant(1, dl, MVT::i32));
7920
1
7921
1
  SDValue RetVal =
7922
1
    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7923
1
7924
1
  return DAG.getNode((VT.getSizeInBits() < 16 ?
7925
1
                      
ISD::TRUNCATE0
: ISD::ZERO_EXTEND), dl, VT, RetVal);
7926
1
}
7927
7928
10
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7929
10
  EVT VT = Op.getValueType();
7930
10
  unsigned BitWidth = VT.getSizeInBits();
7931
10
  SDLoc dl(Op);
7932
10
  assert(Op.getNumOperands() == 3 &&
7933
10
         VT == Op.getOperand(1).getValueType() &&
7934
10
         "Unexpected SHL!");
7935
10
7936
10
  // Expand into a bunch of logical ops.  Note that these ops
7937
10
  // depend on the PPC behavior for oversized shift amounts.
7938
10
  SDValue Lo = Op.getOperand(0);
7939
10
  SDValue Hi = Op.getOperand(1);
7940
10
  SDValue Amt = Op.getOperand(2);
7941
10
  EVT AmtVT = Amt.getValueType();
7942
10
7943
10
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7944
10
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7945
10
  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7946
10
  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7947
10
  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7948
10
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7949
10
                             DAG.getConstant(-BitWidth, dl, AmtVT));
7950
10
  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7951
10
  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7952
10
  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7953
10
  SDValue OutOps[] = { OutLo, OutHi };
7954
10
  return DAG.getMergeValues(OutOps, dl);
7955
10
}
7956
7957
9
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7958
9
  EVT VT = Op.getValueType();
7959
9
  SDLoc dl(Op);
7960
9
  unsigned BitWidth = VT.getSizeInBits();
7961
9
  assert(Op.getNumOperands() == 3 &&
7962
9
         VT == Op.getOperand(1).getValueType() &&
7963
9
         "Unexpected SRL!");
7964
9
7965
9
  // Expand into a bunch of logical ops.  Note that these ops
7966
9
  // depend on the PPC behavior for oversized shift amounts.
7967
9
  SDValue Lo = Op.getOperand(0);
7968
9
  SDValue Hi = Op.getOperand(1);
7969
9
  SDValue Amt = Op.getOperand(2);
7970
9
  EVT AmtVT = Amt.getValueType();
7971
9
7972
9
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7973
9
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7974
9
  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7975
9
  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7976
9
  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7977
9
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7978
9
                             DAG.getConstant(-BitWidth, dl, AmtVT));
7979
9
  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7980
9
  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7981
9
  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7982
9
  SDValue OutOps[] = { OutLo, OutHi };
7983
9
  return DAG.getMergeValues(OutOps, dl);
7984
9
}
7985
7986
8
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7987
8
  SDLoc dl(Op);
7988
8
  EVT VT = Op.getValueType();
7989
8
  unsigned BitWidth = VT.getSizeInBits();
7990
8
  assert(Op.getNumOperands() == 3 &&
7991
8
         VT == Op.getOperand(1).getValueType() &&
7992
8
         "Unexpected SRA!");
7993
8
7994
8
  // Expand into a bunch of logical ops, followed by a select_cc.
7995
8
  SDValue Lo = Op.getOperand(0);
7996
8
  SDValue Hi = Op.getOperand(1);
7997
8
  SDValue Amt = Op.getOperand(2);
7998
8
  EVT AmtVT = Amt.getValueType();
7999
8
8000
8
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8001
8
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8002
8
  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8003
8
  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8004
8
  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8005
8
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8006
8
                             DAG.getConstant(-BitWidth, dl, AmtVT));
8007
8
  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8008
8
  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8009
8
  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8010
8
                                  Tmp4, Tmp6, ISD::SETLE);
8011
8
  SDValue OutOps[] = { OutLo, OutHi };
8012
8
  return DAG.getMergeValues(OutOps, dl);
8013
8
}
8014
8015
//===----------------------------------------------------------------------===//
8016
// Vector related lowering.
8017
//
8018
8019
/// BuildSplatI - Build a canonical splati of Val with an element size of
8020
/// SplatSize.  Cast the result to VT.
8021
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
8022
623
                           SelectionDAG &DAG, const SDLoc &dl) {
8023
623
  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
8024
623
8025
623
  static const MVT VTys[] = { // canonical VT to use for each size.
8026
623
    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8027
623
  };
8028
623
8029
623
  EVT ReqVT = VT != MVT::Other ? 
VT595
:
VTys[SplatSize-1]28
;
8030
623
8031
623
  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8032
623
  if (Val == -1)
8033
340
    SplatSize = 1;
8034
623
8035
623
  EVT CanonicalVT = VTys[SplatSize-1];
8036
623
8037
623
  // Build a canonical splat for this value.
8038
623
  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8039
623
}
8040
8041
/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8042
/// specified intrinsic ID.
8043
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8044
107
                                const SDLoc &dl, EVT DestVT = MVT::Other) {
8045
107
  if (DestVT == MVT::Other) 
DestVT = Op.getValueType()0
;
8046
107
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8047
107
                     DAG.getConstant(IID, dl, MVT::i32), Op);
8048
107
}
8049
8050
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8051
/// specified intrinsic ID.
8052
static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8053
                                SelectionDAG &DAG, const SDLoc &dl,
8054
106
                                EVT DestVT = MVT::Other) {
8055
106
  if (DestVT == MVT::Other) 
DestVT = LHS.getValueType()44
;
8056
106
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8057
106
                     DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8058
106
}
8059
8060
/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8061
/// specified intrinsic ID.
8062
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8063
                                SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8064
119
                                EVT DestVT = MVT::Other) {
8065
119
  if (DestVT == MVT::Other) 
DestVT = Op0.getValueType()112
;
8066
119
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8067
119
                     DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8068
119
}
8069
8070
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8071
/// amount.  The result has the specified value type.
8072
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8073
69
                           SelectionDAG &DAG, const SDLoc &dl) {
8074
69
  // Force LHS/RHS to be the right type.
8075
69
  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8076
69
  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8077
69
8078
69
  int Ops[16];
8079
1.17k
  for (unsigned i = 0; i != 16; 
++i1.10k
)
8080
1.10k
    Ops[i] = i + Amt;
8081
69
  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8082
69
  return DAG.getNode(ISD::BITCAST, dl, VT, T);
8083
69
}
8084
8085
/// Do we have an efficient pattern in a .td file for this node?
8086
///
8087
/// \param V - pointer to the BuildVectorSDNode being matched
8088
/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8089
///
8090
/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8091
/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8092
/// the opposite is true (expansion is beneficial) are:
8093
/// - The node builds a vector out of integers that are not 32 or 64-bits
8094
/// - The node builds a vector out of constants
8095
/// - The node is a "load-and-splat"
8096
/// In all other cases, we will choose to keep the BUILD_VECTOR.
8097
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8098
                                            bool HasDirectMove,
8099
2.67k
                                            bool HasP8Vector) {
8100
2.67k
  EVT VecVT = V->getValueType(0);
8101
2.67k
  bool RightType = VecVT == MVT::v2f64 ||
8102
2.67k
    
(2.13k
HasP8Vector2.13k
&&
VecVT == MVT::v4f322.08k
) ||
8103
2.67k
    
(1.84k
HasDirectMove1.84k
&&
(1.80k
VecVT == MVT::v2i641.80k
||
VecVT == MVT::v4i321.07k
));
8104
2.67k
  if (!RightType)
8105
645
    return false;
8106
2.03k
8107
2.03k
  bool IsSplat = true;
8108
2.03k
  bool IsLoad = false;
8109
2.03k
  SDValue Op0 = V->getOperand(0);
8110
2.03k
8111
2.03k
  // This function is called in a block that confirms the node is not a constant
8112
2.03k
  // splat. So a constant BUILD_VECTOR here means the vector is built out of
8113
2.03k
  // different constants.
8114
2.03k
  if (V->isConstant())
8115
333
    return false;
8116
6.21k
  
for (int i = 0, e = V->getNumOperands(); 1.69k
i < e;
++i4.51k
) {
8117
4.66k
    if (V->getOperand(i).isUndef())
8118
155
      return false;
8119
4.51k
    // We want to expand nodes that represent load-and-splat even if the
8120
4.51k
    // loaded value is a floating point truncation or conversion to int.
8121
4.51k
    if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8122
4.51k
        
(3.75k
V->getOperand(i).getOpcode() == ISD::FP_ROUND3.75k
&&
8123
3.75k
         
V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD104
) ||
8124
4.51k
        
(3.65k
V->getOperand(i).getOpcode() == ISD::FP_TO_SINT3.65k
&&
8125
3.65k
         
V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD404
) ||
8126
4.51k
        
(3.45k
V->getOperand(i).getOpcode() == ISD::FP_TO_UINT3.45k
&&
8127
3.45k
         
V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD584
))
8128
1.25k
      IsLoad = true;
8129
4.51k
    // If the operands are different or the input is not a load and has more
8130
4.51k
    // uses than just this BV node, then it isn't a splat.
8131
4.51k
    if (V->getOperand(i) != Op0 ||
8132
4.51k
        
(2.24k
!IsLoad2.24k
&&
!V->isOnlyUserOf(V->getOperand(i).getNode())1.57k
))
8133
2.47k
      IsSplat = false;
8134
4.51k
  }
8135
1.69k
  
return !(1.54k
IsSplat1.54k
&&
IsLoad278
);
8136
1.69k
}
8137
8138
// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8139
12
SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8140
12
8141
12
  SDLoc dl(Op);
8142
12
  SDValue Op0 = Op->getOperand(0);
8143
12
8144
12
  if (!EnableQuadPrecision ||
8145
12
      (Op.getValueType() != MVT::f128 ) ||
8146
12
      (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8147
12
      (Op0.getOperand(0).getValueType() !=  MVT::i64) ||
8148
12
      (Op0.getOperand(1).getValueType() != MVT::i64))
8149
0
    return SDValue();
8150
12
8151
12
  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8152
12
                     Op0.getOperand(1));
8153
12
}
8154
8155
// If this is a case we can't handle, return null and let the default
8156
// expansion code take care of it.  If we CAN select this case, and if it
8157
// selects to a single instruction, return Op.  Otherwise, if we can codegen
8158
// this case more efficiently than a constant pool load, lower it to the
8159
// sequence of ops that should be used.
8160
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8161
4.24k
                                             SelectionDAG &DAG) const {
8162
4.24k
  SDLoc dl(Op);
8163
4.24k
  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8164
4.24k
  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8165
4.24k
8166
4.24k
  if (Subtarget.hasQPX() && 
Op.getValueType() == MVT::v4i165
) {
8167
9
    // We first build an i32 vector, load it into a QPX register,
8168
9
    // then convert it to a floating-point vector and compare it
8169
9
    // to a zero vector to get the boolean result.
8170
9
    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8171
9
    int FrameIdx = MFI.CreateStackObject(16, 16, false);
8172
9
    MachinePointerInfo PtrInfo =
8173
9
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8174
9
    EVT PtrVT = getPointerTy(DAG.getDataLayout());
8175
9
    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8176
9
8177
9
    assert(BVN->getNumOperands() == 4 &&
8178
9
      "BUILD_VECTOR for v4i1 does not have 4 operands");
8179
9
8180
9
    bool IsConst = true;
8181
17
    for (unsigned i = 0; i < 4; 
++i8
) {
8182
15
      if (BVN->getOperand(i).isUndef()) 
continue2
;
8183
13
      if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
8184
7
        IsConst = false;
8185
7
        break;
8186
7
      }
8187
13
    }
8188
9
8189
9
    if (IsConst) {
8190
2
      Constant *One =
8191
2
        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
8192
2
      Constant *NegOne =
8193
2
        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
8194
2
8195
2
      Constant *CV[4];
8196
10
      for (unsigned i = 0; i < 4; 
++i8
) {
8197
8
        if (BVN->getOperand(i).isUndef())
8198
2
          CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
8199
6
        else if (isNullConstant(BVN->getOperand(i)))
8200
2
          CV[i] = NegOne;
8201
4
        else
8202
4
          CV[i] = One;
8203
8
      }
8204
2
8205
2
      Constant *CP = ConstantVector::get(CV);
8206
2
      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
8207
2
                                          16 /* alignment */);
8208
2
8209
2
      SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
8210
2
      SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
8211
2
      return DAG.getMemIntrinsicNode(
8212
2
          PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
8213
2
          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
8214
2
    }
8215
7
8216
7
    SmallVector<SDValue, 4> Stores;
8217
35
    for (unsigned i = 0; i < 4; 
++i28
) {
8218
28
      if (BVN->getOperand(i).isUndef()) 
continue2
;
8219
26
8220
26
      unsigned Offset = 4*i;
8221
26
      SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8222
26
      Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8223
26
8224
26
      unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
8225
26
      if (StoreSize > 4) {
8226
0
        Stores.push_back(
8227
0
            DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
8228
0
                              PtrInfo.getWithOffset(Offset), MVT::i32));
8229
26
      } else {
8230
26
        SDValue StoreValue = BVN->getOperand(i);
8231
26
        if (StoreSize < 4)
8232
18
          StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
8233
26
8234
26
        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
8235
26
                                      PtrInfo.getWithOffset(Offset)));
8236
26
      }
8237
26
    }
8238
7
8239
7
    SDValue StoreChain;
8240
7
    if (!Stores.empty())
8241
7
      StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8242
0
    else
8243
0
      StoreChain = DAG.getEntryNode();
8244
7
8245
7
    // Now load from v4i32 into the QPX register; this will extend it to
8246
7
    // v4i64 but not yet convert it to a floating point. Nevertheless, this
8247
7
    // is typed as v4f64 because the QPX register integer states are not
8248
7
    // explicitly represented.
8249
7
8250
7
    SDValue Ops[] = {StoreChain,
8251
7
                     DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
8252
7
                     FIdx};
8253
7
    SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
8254
7
8255
7
    SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
8256
7
      dl, VTs, Ops, MVT::v4i32, PtrInfo);
8257
7
    LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8258
7
      DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
8259
7
      LoadedVect);
8260
7
8261
7
    SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
8262
7
8263
7
    return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
8264
7
  }
8265
4.23k
8266
4.23k
  // All other QPX vectors are handled by generic code.
8267
4.23k
  if (Subtarget.hasQPX())
8268
56
    return SDValue();
8269
4.18k
8270
4.18k
  // Check if this is a splat of a constant value.
8271
4.18k
  APInt APSplatBits, APSplatUndef;
8272
4.18k
  unsigned SplatBitSize;
8273
4.18k
  bool HasAnyUndefs;
8274
4.18k
  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8275
4.18k
                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
8276
4.18k
      
SplatBitSize > 322.30k
) {
8277
2.71k
    // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8278
2.71k
    // lowered to VSX instructions under certain conditions.
8279
2.71k
    // Without VSX, there is no pattern more efficient than expanding the node.
8280
2.71k
    if (Subtarget.hasVSX() &&
8281
2.71k
        haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8282
2.67k
                                        Subtarget.hasP8Vector()))
8283
1.43k
      return Op;
8284
1.28k
    return SDValue();
8285
1.28k
  }
8286
1.46k
8287
1.46k
  unsigned SplatBits = APSplatBits.getZExtValue();
8288
1.46k
  unsigned SplatUndef = APSplatUndef.getZExtValue();
8289
1.46k
  unsigned SplatSize = SplatBitSize / 8;
8290
1.46k
8291
1.46k
  // First, handle single instruction cases.
8292
1.46k
8293
1.46k
  // All zeros?
8294
1.46k
  if (SplatBits == 0) {
8295
695
    // Canonicalize all zero vectors to be v4i32.
8296
695
    if (Op.getValueType() != MVT::v4i32 || 
HasAnyUndefs526
) {
8297
169
      SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8298
169
      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8299
169
    }
8300
695
    return Op;
8301
695
  }
8302
774
8303
774
  // We have XXSPLTIB for constant splats one byte wide
8304
774
  if (Subtarget.hasP9Vector() && 
SplatSize == 1170
) {
8305
88
    // This is a splat of 1-byte elements with some elements potentially undef.
8306
88
    // Rather than trying to match undef in the SDAG patterns, ensure that all
8307
88
    // elements are the same constant.
8308
88
    if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
8309
30
      SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
8310
30
                                                       dl, MVT::i32));
8311
30
      SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
8312
30
      if (Op.getValueType() != MVT::v16i8)
8313
8
        return DAG.getBitcast(Op.getValueType(), NewBV);
8314
22
      return NewBV;
8315
22
    }
8316
58
8317
58
    // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8318
58
    // detect that constant splats like v8i16: 0xABAB are really just splats
8319
58
    // of a 1-byte constant. In this case, we need to convert the node to a
8320
58
    // splat of v16i8 and a bitcast.
8321
58
    if (Op.getValueType() != MVT::v16i8)
8322
6
      return DAG.getBitcast(Op.getValueType(),
8323
6
                            DAG.getConstant(SplatBits, dl, MVT::v16i8));
8324
52
8325
52
    return Op;
8326
52
  }
8327
686
8328
686
  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8329
686
  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8330
686
                    (32-SplatBitSize));
8331
686
  if (SextVal >= -16 && 
SextVal <= 15653
)
8332
570
    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
8333
116
8334
116
  // Two instruction sequences.
8335
116
8336
116
  // If this value is in the range [-32,30] and is even, use:
8337
116
  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8338
116
  // If this value is in the range [17,31] and is odd, use:
8339
116
  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8340
116
  // If this value is in the range [-31,-17] and is odd, use:
8341
116
  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8342
116
  // Note the last two are three-instruction sequences.
8343
116
  if (SextVal >= -32 && 
SextVal <= 3193
) {
8344
50
    // To avoid having these optimizations undone by constant folding,
8345
50
    // we convert to a pseudo that will be expanded later into one of
8346
50
    // the above forms.
8347
50
    SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8348
50
    EVT VT = (SplatSize == 1 ? 
MVT::v16i84
:
8349
50
              
(SplatSize == 2 46
?
MVT::v8i1611
:
MVT::v4i3235
));
8350
50
    SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8351
50
    SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8352
50
    if (VT == Op.getValueType())
8353
45
      return RetVal;
8354
5
    else
8355
5
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8356
66
  }
8357
66
8358
66
  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
8359
66
  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
8360
66
  // for fneg/fabs.
8361
66
  if (SplatSize == 4 && 
SplatBits == (0x7FFFFFFF&~SplatUndef)51
) {
8362
2
    // Make -1 and vspltisw -1:
8363
2
    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
8364
2
8365
2
    // Make the VSLW intrinsic, computing 0x8000_0000.
8366
2
    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8367
2
                                   OnesV, DAG, dl);
8368
2
8369
2
    // xor by OnesV to invert it.
8370
2
    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8371
2
    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8372
2
  }
8373
64
8374
64
  // Check to see if this is a wide variety of vsplti*, binop self cases.
8375
64
  static const signed char SplatCsts[] = {
8376
64
    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8377
64
    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8378
64
  };
8379
64
8380
1.68k
  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); 
++idx1.62k
) {
8381
1.65k
    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8382
1.65k
    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
8383
1.65k
    int i = SplatCsts[idx];
8384
1.65k
8385
1.65k
    // Figure out what shift amount will be used by altivec if shifted by i in
8386
1.65k
    // this splat size.
8387
1.65k
    unsigned TypeShiftAmt = i & (SplatBitSize-1);
8388
1.65k
8389
1.65k
    // vsplti + shl self.
8390
1.65k
    if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8391
8
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8392
8
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
8393
8
        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8394
8
        Intrinsic::ppc_altivec_vslw
8395
8
      };
8396
8
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8397
8
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8398
8
    }
8399
1.64k
8400
1.64k
    // vsplti + srl self.
8401
1.64k
    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8402
17
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8403
17
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
8404
17
        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8405
17
        Intrinsic::ppc_altivec_vsrw
8406
17
      };
8407
17
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8408
17
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8409
17
    }
8410
1.62k
8411
1.62k
    // vsplti + sra self.
8412
1.62k
    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8413
0
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8414
0
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
8415
0
        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
8416
0
        Intrinsic::ppc_altivec_vsraw
8417
0
      };
8418
0
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8419
0
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8420
0
    }
8421
1.62k
8422
1.62k
    // vsplti + rol self.
8423
1.62k
    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8424
1.62k
                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8425
3
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8426
3
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
8427
3
        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8428
3
        Intrinsic::ppc_altivec_vrlw
8429
3
      };
8430
3
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8431
3
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8432
3
    }
8433
1.62k
8434
1.62k
    // t = vsplti c, result = vsldoi t, t, 1
8435
1.62k
    if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 
0xFF826
:
0800
))) {
8436
4
      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8437
4
      unsigned Amt = Subtarget.isLittleEndian() ? 
152
:
12
;
8438
4
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8439
4
    }
8440
1.62k
    // t = vsplti c, result = vsldoi t, t, 2
8441
1.62k
    if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 
0xFFFF824
:
0798
))) {
8442
0
      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8443
0
      unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8444
0
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8445
0
    }
8446
1.62k
    // t = vsplti c, result = vsldoi t, t, 3
8447
1.62k
    if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 
0xFFFFFF824
:
0798
))) {
8448
0
      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8449
0
      unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8450
0
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8451
0
    }
8452
1.62k
  }
8453
64
8454
64
  
return SDValue()32
;
8455
64
}
8456
8457
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8458
/// the specified operations to build the shuffle.
8459
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8460
                                      SDValue RHS, SelectionDAG &DAG,
8461
222
                                      const SDLoc &dl) {
8462
222
  unsigned OpNum = (PFEntry >> 26) & 0x0F;
8463
222
  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8464
222
  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
8465
222
8466
222
  enum {
8467
222
    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8468
222
    OP_VMRGHW,
8469
222
    OP_VMRGLW,
8470
222
    OP_VSPLTISW0,
8471
222
    OP_VSPLTISW1,
8472
222
    OP_VSPLTISW2,
8473
222
    OP_VSPLTISW3,
8474
222
    OP_VSLDOI4,
8475
222
    OP_VSLDOI8,
8476
222
    OP_VSLDOI12
8477
222
  };
8478
222
8479
222
  if (OpNum == OP_COPY) {
8480
127
    if (LHSID == (1*9+2)*9+3) 
return LHS83
;
8481
44
    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8482
44
    return RHS;
8483
44
  }
8484
95
8485
95
  SDValue OpLHS, OpRHS;
8486
95
  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8487
95
  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8488
95
8489
95
  int ShufIdxs[16];
8490
95
  switch (OpNum) {
8491
95
  
default: 0
llvm_unreachable0
("Unknown i32 permute!");
8492
95
  case OP_VMRGHW:
8493
12
    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
8494
12
    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8495
12
    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
8496
12
    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8497
12
    break;
8498
95
  case OP_VMRGLW:
8499
12
    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8500
12
    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8501
12
    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8502
12
    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8503
12
    break;
8504
95
  case OP_VSPLTISW0:
8505
51
    for (unsigned i = 0; i != 16; 
++i48
)
8506
48
      ShufIdxs[i] = (i&3)+0;
8507
3
    break;
8508
95
  case OP_VSPLTISW1:
8509
0
    for (unsigned i = 0; i != 16; ++i)
8510
0
      ShufIdxs[i] = (i&3)+4;
8511
0
    break;
8512
95
  case OP_VSPLTISW2:
8513
51
    for (unsigned i = 0; i != 16; 
++i48
)
8514
48
      ShufIdxs[i] = (i&3)+8;
8515
3
    break;
8516
95
  case OP_VSPLTISW3:
8517
0
    for (unsigned i = 0; i != 16; ++i)
8518
0
      ShufIdxs[i] = (i&3)+12;
8519
0
    break;
8520
95
  case OP_VSLDOI4:
8521
26
    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8522
95
  case OP_VSLDOI8:
8523
19
    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8524
95
  case OP_VSLDOI12:
8525
20
    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8526
30
  }
8527
30
  EVT VT = OpLHS.getValueType();
8528
30
  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8529
30
  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8530
30
  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8531
30
  return DAG.getNode(ISD::BITCAST, dl, VT, T);
8532
30
}
8533
8534
/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8535
/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8536
/// SDValue.
8537
SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8538
2.03k
                                           SelectionDAG &DAG) const {
8539
2.03k
  const unsigned BytesInVector = 16;
8540
2.03k
  bool IsLE = Subtarget.isLittleEndian();
8541
2.03k
  SDLoc dl(N);
8542
2.03k
  SDValue V1 = N->getOperand(0);
8543
2.03k
  SDValue V2 = N->getOperand(1);
8544
2.03k
  unsigned ShiftElts = 0, InsertAtByte = 0;
8545
2.03k
  bool Swap = false;
8546
2.03k
8547
2.03k
  // Shifts required to get the byte we want at element 7.
8548
2.03k
  unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
8549
2.03k
                                   0, 15, 14, 13, 12, 11, 10, 9};
8550
2.03k
  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8551
2.03k
                                1, 2,  3,  4,  5,  6,  7,  8};
8552
2.03k
8553
2.03k
  ArrayRef<int> Mask = N->getMask();
8554
2.03k
  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8555
2.03k
8556
2.03k
  // For each mask element, find out if we're just inserting something
8557
2.03k
  // from V2 into V1 or vice versa.
8558
2.03k
  // Possible permutations inserting an element from V2 into V1:
8559
2.03k
  //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8560
2.03k
  //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8561
2.03k
  //   ...
8562
2.03k
  //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8563
2.03k
  // Inserting from V1 into V2 will be similar, except mask range will be
8564
2.03k
  // [16,31].
8565
2.03k
8566
2.03k
  bool FoundCandidate = false;
8567
2.03k
  // If both vector operands for the shuffle are the same vector, the mask
8568
2.03k
  // will contain only elements from the first one and the second one will be
8569
2.03k
  // undef.
8570
2.03k
  unsigned VINSERTBSrcElem = IsLE ? 
81.06k
:
7962
;
8571
2.03k
  // Go through the mask of half-words to find an element that's being moved
8572
2.03k
  // from one vector to the other.
8573
33.1k
  for (unsigned i = 0; i < BytesInVector; 
++i31.1k
) {
8574
31.2k
    unsigned CurrentElement = Mask[i];
8575
31.2k
    // If 2nd operand is undefined, we should only look for element 7 in the
8576
31.2k
    // Mask.
8577
31.2k
    if (V2.isUndef() && 
CurrentElement != VINSERTBSrcElem5.88k
)
8578
5.72k
      continue;
8579
25.5k
8580
25.5k
    bool OtherElementsInOrder = true;
8581
25.5k
    // Examine the other elements in the Mask to see if they're in original
8582
25.5k
    // order.
8583
80.0k
    for (unsigned j = 0; j < BytesInVector; 
++j54.5k
) {
8584
79.9k
      if (j == i)
8585
3.33k
        continue;
8586
76.5k
      // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8587
76.5k
      // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
8588
76.5k
      // in which we always assume we're always picking from the 1st operand.
8589
76.5k
      int MaskOffset =
8590
76.5k
          (!V2.isUndef() && 
CurrentElement < BytesInVector75.5k
) ?
BytesInVector7.19k
:
069.3k
;
8591
76.5k
      if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8592
25.3k
        OtherElementsInOrder = false;
8593
25.3k
        break;
8594
25.3k
      }
8595
76.5k
    }
8596
25.5k
    // If other elements are in original order, we record the number of shifts
8597
25.5k
    // we need to get the element we want into element 7. Also record which byte
8598
25.5k
    // in the vector we should insert into.
8599
25.5k
    if (OtherElementsInOrder) {
8600
160
      // If 2nd operand is undefined, we assume no shifts and no swapping.
8601
160
      if (V2.isUndef()) {
8602
32
        ShiftElts = 0;
8603
32
        Swap = false;
8604
128
      } else {
8605
128
        // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8606
128
        ShiftElts = IsLE ? 
LittleEndianShifts[CurrentElement & 0xF]64
8607
128
                         : 
BigEndianShifts[CurrentElement & 0xF]64
;
8608
128
        Swap = CurrentElement < BytesInVector;
8609
128
      }
8610
160
      InsertAtByte = IsLE ? 
BytesInVector - (i + 1)80
:
i80
;
8611
160
      FoundCandidate = true;
8612
160
      break;
8613
160
    }
8614
25.5k
  }
8615
2.03k
8616
2.03k
  if (!FoundCandidate)
8617
1.87k
    return SDValue();
8618
160
8619
160
  // Candidate found, construct the proper SDAG sequence with VINSERTB,
8620
160
  // optionally with VECSHL if shift is required.
8621
160
  if (Swap)
8622
64
    std::swap(V1, V2);
8623
160
  if (V2.isUndef())
8624
32
    V2 = V1;
8625
160
  if (ShiftElts) {
8626
120
    SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8627
120
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
8628
120
    return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
8629
120
                       DAG.getConstant(InsertAtByte, dl, MVT::i32));
8630
120
  }
8631
40
  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
8632
40
                     DAG.getConstant(InsertAtByte, dl, MVT::i32));
8633
40
}
8634
8635
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8636
/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8637
/// SDValue.
8638
SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
8639
2.11k
                                           SelectionDAG &DAG) const {
8640
2.11k
  const unsigned NumHalfWords = 8;
8641
2.11k
  const unsigned BytesInVector = NumHalfWords * 2;
8642
2.11k
  // Check that the shuffle is on half-words.
8643
2.11k
  if (!isNByteElemShuffleMask(N, 2, 1))
8644
1.81k
    return SDValue();
8645
300
8646
300
  bool IsLE = Subtarget.isLittleEndian();
8647
300
  SDLoc dl(N);
8648
300
  SDValue V1 = N->getOperand(0);
8649
300
  SDValue V2 = N->getOperand(1);
8650
300
  unsigned ShiftElts = 0, InsertAtByte = 0;
8651
300
  bool Swap = false;
8652
300
8653
300
  // Shifts required to get the half-word we want at element 3.
8654
300
  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
8655
300
  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
8656
300
8657
300
  uint32_t Mask = 0;
8658
300
  uint32_t OriginalOrderLow = 0x1234567;
8659
300
  uint32_t OriginalOrderHigh = 0x89ABCDEF;
8660
300
  // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
8661
300
  // 32-bit space, only need 4-bit nibbles per element.
8662
2.70k
  for (unsigned i = 0; i < NumHalfWords; 
++i2.40k
) {
8663
2.40k
    unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8664
2.40k
    Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
8665
2.40k
  }
8666
300
8667
300
  // For each mask element, find out if we're just inserting something
8668
300
  // from V2 into V1 or vice versa.  Possible permutations inserting an element
8669
300
  // from V2 into V1:
8670
300
  //   X, 1, 2, 3, 4, 5, 6, 7
8671
300
  //   0, X, 2, 3, 4, 5, 6, 7
8672
300
  //   0, 1, X, 3, 4, 5, 6, 7
8673
300
  //   0, 1, 2, X, 4, 5, 6, 7
8674
300
  //   0, 1, 2, 3, X, 5, 6, 7
8675
300
  //   0, 1, 2, 3, 4, X, 6, 7
8676
300
  //   0, 1, 2, 3, 4, 5, X, 7
8677
300
  //   0, 1, 2, 3, 4, 5, 6, X
8678
300
  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8679
300
8680
300
  bool FoundCandidate = false;
8681
300
  // Go through the mask of half-words to find an element that's being moved
8682
300
  // from one vector to the other.
8683
2.34k
  for (unsigned i = 0; i < NumHalfWords; 
++i2.04k
) {
8684
2.12k
    unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8685
2.12k
    uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
8686
2.12k
    uint32_t MaskOtherElts = ~(0xF << MaskShift);
8687
2.12k
    uint32_t TargetOrder = 0x0;
8688
2.12k
8689
2.12k
    // If both vector operands for the shuffle are the same vector, the mask
8690
2.12k
    // will contain only elements from the first one and the second one will be
8691
2.12k
    // undef.
8692
2.12k
    if (V2.isUndef()) {
8693
856
      ShiftElts = 0;
8694
856
      unsigned VINSERTHSrcElem = IsLE ? 
4416
:
3440
;
8695
856
      TargetOrder = OriginalOrderLow;
8696
856
      Swap = false;
8697
856
      // Skip if not the correct element or mask of other elements don't equal
8698
856
      // to our expected order.
8699
856
      if (MaskOneElt == VINSERTHSrcElem &&
8700
856
          
(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)56
) {
8701
16
        InsertAtByte = IsLE ? 
BytesInVector - (i + 1) * 28
:
i * 28
;
8702
16
        FoundCandidate = true;
8703
16
        break;
8704
16
      }
8705
1.26k
    } else { // If both operands are defined.
8706
1.26k
      // Target order is [8,15] if the current mask is between [0,7].
8707
1.26k
      TargetOrder =
8708
1.26k
          (MaskOneElt < NumHalfWords) ? 
OriginalOrderHigh622
:
OriginalOrderLow642
;
8709
1.26k
      // Skip if mask of other elements don't equal our expected order.
8710
1.26k
      if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8711
64
        // We only need the last 3 bits for the number of shifts.
8712
64
        ShiftElts = IsLE ? 
LittleEndianShifts[MaskOneElt & 0x7]32
8713
64
                         : 
BigEndianShifts[MaskOneElt & 0x7]32
;
8714
64
        InsertAtByte = IsLE ? 
BytesInVector - (i + 1) * 232
:
i * 232
;
8715
64
        Swap = MaskOneElt < NumHalfWords;
8716
64
        FoundCandidate = true;
8717
64
        break;
8718
64
      }
8719
1.26k
    }
8720
2.12k
  }
8721
300
8722
300
  if (!FoundCandidate)
8723
220
    return SDValue();
8724
80
8725
80
  // Candidate found, construct the proper SDAG sequence with VINSERTH,
8726
80
  // optionally with VECSHL if shift is required.
8727
80
  if (Swap)
8728
32
    std::swap(V1, V2);
8729
80
  if (V2.isUndef())
8730
16
    V2 = V1;
8731
80
  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8732
80
  if (ShiftElts) {
8733
56
    // Double ShiftElts because we're left shifting on v16i8 type.
8734
56
    SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8735
56
                              DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
8736
56
    SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
8737
56
    SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8738
56
                              DAG.getConstant(InsertAtByte, dl, MVT::i32));
8739
56
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8740
56
  }
8741
24
  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
8742
24
  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8743
24
                            DAG.getConstant(InsertAtByte, dl, MVT::i32));
8744
24
  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8745
24
}
8746
8747
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
8748
/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
8749
/// return the code it can be lowered into.  Worst case, it can always be
8750
/// lowered into a vperm.
8751
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
8752
3.75k
                                               SelectionDAG &DAG) const {
8753
3.75k
  SDLoc dl(Op);
8754
3.75k
  SDValue V1 = Op.getOperand(0);
8755
3.75k
  SDValue V2 = Op.getOperand(1);
8756
3.75k
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8757
3.75k
  EVT VT = Op.getValueType();
8758
3.75k
  bool isLittleEndian = Subtarget.isLittleEndian();
8759
3.75k
8760
3.75k
  unsigned ShiftElts, InsertAtByte;
8761
3.75k
  bool Swap = false;
8762
3.75k
  if (Subtarget.hasP9Vector() &&
8763
3.75k
      PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
8764
2.24k
                           isLittleEndian)) {
8765
134
    if (Swap)
8766
70
      std::swap(V1, V2);
8767
134
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8768
134
    SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
8769
134
    if (ShiftElts) {
8770
96
      SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
8771
96
                                DAG.getConstant(ShiftElts, dl, MVT::i32));
8772
96
      SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
8773
96
                                DAG.getConstant(InsertAtByte, dl, MVT::i32));
8774
96
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8775
96
    }
8776
38
    SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
8777
38
                              DAG.getConstant(InsertAtByte, dl, MVT::i32));
8778
38
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8779
38
  }
8780
3.62k
8781
3.62k
  if (Subtarget.hasP9Altivec()) {
8782
2.11k
    SDValue NewISDNode;
8783
2.11k
    if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
8784
80
      return NewISDNode;
8785
2.03k
8786
2.03k
    if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
8787
160
      return NewISDNode;
8788
3.38k
  }
8789
3.38k
8790
3.38k
  if (Subtarget.hasVSX() &&
8791
3.38k
      
PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)3.05k
) {
8792
74
    if (Swap)
8793
14
      std::swap(V1, V2);
8794
74
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8795
74
    SDValue Conv2 =
8796
74
        DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? 
V132
:
V242
);
8797
74
8798
74
    SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
8799
74
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
8800
74
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
8801
74
  }
8802
3.30k
8803
3.30k
  if (Subtarget.hasVSX() &&
8804
3.30k
    
PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)2.97k
) {
8805
58
    if (Swap)
8806
37
      std::swap(V1, V2);
8807
58
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8808
58
    SDValue Conv2 =
8809
58
        DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? 
V10
: V2);
8810
58
8811
58
    SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
8812
58
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
8813
58
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
8814
58
  }
8815
3.24k
8816
3.24k
  if (Subtarget.hasP9Vector()) {
8817
1.82k
     if (PPC::isXXBRHShuffleMask(SVOp)) {
8818
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8819
2
      SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
8820
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
8821
1.82k
    } else if (PPC::isXXBRWShuffleMask(SVOp)) {
8822
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8823
2
      SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
8824
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
8825
1.82k
    } else if (PPC::isXXBRDShuffleMask(SVOp)) {
8826
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8827
2
      SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
8828
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
8829
1.82k
    } else if (PPC::isXXBRQShuffleMask(SVOp)) {
8830
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
8831
2
      SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
8832
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
8833
2
    }
8834
3.24k
  }
8835
3.24k
8836
3.24k
  if (Subtarget.hasVSX()) {
8837
2.91k
    if (V2.isUndef() && 
PPC::isSplatShuffleMask(SVOp, 4)592
) {
8838
120
      int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
8839
120
8840
120
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8841
120
      SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
8842
120
                                  DAG.getConstant(SplatIdx, dl, MVT::i32));
8843
120
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
8844
120
    }
8845
2.79k
8846
2.79k
    // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8847
2.79k
    if (V2.isUndef() && 
PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8472
) {
8848
8
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
8849
8
      SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
8850
8
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
8851
8
    }
8852
3.11k
  }
8853
3.11k
8854
3.11k
  if (Subtarget.hasQPX()) {
8855
66
    if (VT.getVectorNumElements() != 4)
8856
0
      return SDValue();
8857
66
8858
66
    if (V2.isUndef()) 
V2 = V12
;
8859
66
8860
66
    int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
8861
66
    if (AlignIdx != -1) {
8862
0
      return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
8863
0
                         DAG.getConstant(AlignIdx, dl, MVT::i32));
8864
66
    } else if (SVOp->isSplat()) {
8865
2
      int SplatIdx = SVOp->getSplatIndex();
8866
2
      if (SplatIdx >= 4) {
8867
0
        std::swap(V1, V2);
8868
0
        SplatIdx -= 4;
8869
0
      }
8870
2
8871
2
      return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
8872
2
                         DAG.getConstant(SplatIdx, dl, MVT::i32));
8873
2
    }
8874
64
8875
64
    // Lower this into a qvgpci/qvfperm pair.
8876
64
8877
64
    // Compute the qvgpci literal
8878
64
    unsigned idx = 0;
8879
320
    for (unsigned i = 0; i < 4; 
++i256
) {
8880
256
      int m = SVOp->getMaskElt(i);
8881
256
      unsigned mm = m >= 0 ? 
(unsigned) m192
:
i64
;
8882
256
      idx |= mm << (3-i)*3;
8883
256
    }
8884
64
8885
64
    SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
8886
64
                             DAG.getConstant(idx, dl, MVT::i32));
8887
64
    return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8888
64
  }
8889
3.04k
8890
3.04k
  // Cases that are handled by instructions that take permute immediates
8891
3.04k
  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8892
3.04k
  // selected by the instruction selector.
8893
3.04k
  if (V2.isUndef()) {
8894
578
    if (PPC::isSplatShuffleMask(SVOp, 1) ||
8895
578
        
PPC::isSplatShuffleMask(SVOp, 2)532
||
8896
578
        
PPC::isSplatShuffleMask(SVOp, 4)492
||
8897
578
        
PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG)474
||
8898
578
        
PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG)445
||
8899
578
        
PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1427
||
8900
578
        
PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG)382
||
8901
578
        
PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG)376
||
8902
578
        
PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG)316
||
8903
578
        
PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG)306
||
8904
578
        
PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG)300
||
8905
578
        
PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)237
||
8906
578
        
(227
Subtarget.hasP8Altivec()227
&& (
8907
216
         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8908
216
         
PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG)212
||
8909
364
         
PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)207
))) {
8910
364
      return Op;
8911
364
    }
8912
2.68k
  }
8913
2.68k
8914
2.68k
  // Altivec has a variety of "shuffle immediates" that take two vector inputs
8915
2.68k
  // and produce a fixed permutation.  If any of these match, do not lower to
8916
2.68k
  // VPERM.
8917
2.68k
  unsigned int ShuffleKind = isLittleEndian ? 
21.66k
:
01.01k
;
8918
2.68k
  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8919
2.68k
      
PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG)2.67k
||
8920
2.68k
      
PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -12.67k
||
8921
2.68k
      
PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG)2.63k
||
8922
2.68k
      
PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG)2.34k
||
8923
2.68k
      
PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG)1.82k
||
8924
2.68k
      
PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG)1.25k
||
8925
2.68k
      
PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG)1.09k
||
8926
2.68k
      
PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)811
||
8927
2.68k
      
(532
Subtarget.hasP8Altivec()532
&& (
8928
495
       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8929
495
       
PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG)491
||
8930
495
       
PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)487
)))
8931
2.16k
    return Op;
8932
520
8933
520
  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8934
520
  // perfect shuffle table to emit an optimal matching sequence.
8935
520
  ArrayRef<int> PermMask = SVOp->getMask();
8936
520
8937
520
  unsigned PFIndexes[4];
8938
520
  bool isFourElementShuffle = true;
8939
1.68k
  for (unsigned i = 0; i != 4 && 
isFourElementShuffle1.56k
;
++i1.16k
) { // Element number
8940
1.16k
    unsigned EltNo = 8;   // Start out undef.
8941
4.79k
    for (unsigned j = 0; j != 4; 
++j3.62k
) { // Intra-element byte.
8942
4.03k
      if (PermMask[i*4+j] < 0)
8943
874
        continue;   // Undef, ignore it.
8944
3.15k
8945
3.15k
      unsigned ByteSource = PermMask[i*4+j];
8946
3.15k
      if ((ByteSource & 3) != j) {
8947
286
        isFourElementShuffle = false;
8948
286
        break;
8949
286
      }
8950
2.87k
8951
2.87k
      if (EltNo == 8) {
8952
860
        EltNo = ByteSource/4;
8953
2.01k
      } else if (EltNo != ByteSource/4) {
8954
118
        isFourElementShuffle = false;
8955
118
        break;
8956
118
      }
8957
2.87k
    }
8958
1.16k
    PFIndexes[i] = EltNo;
8959
1.16k
  }
8960
520
8961
520
  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8962
520
  // perfect shuffle vector to determine if it is cost effective to do this as
8963
520
  // discrete instructions, or whether we should use a vperm.
8964
520
  // For now, we skip this for little endian until such time as we have a
8965
520
  // little-endian perfect shuffle table.
8966
520
  if (isFourElementShuffle && 
!isLittleEndian116
) {
8967
44
    // Compute the index in the perfect shuffle table.
8968
44
    unsigned PFTableIndex =
8969
44
      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8970
44
8971
44
    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8972
44
    unsigned Cost  = (PFEntry >> 30);
8973
44
8974
44
    // Determining when to avoid vperm is tricky.  Many things affect the cost
8975
44
    // of vperm, particularly how many times the perm mask needs to be computed.
8976
44
    // For example, if the perm mask can be hoisted out of a loop or is already
8977
44
    // used (perhaps because there are multiple permutes with the same shuffle
8978
44
    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
8979
44
    // the loop requires an extra register.
8980
44
    //
8981
44
    // As a compromise, we only emit discrete instructions if the shuffle can be
8982
44
    // generated in 3 or fewer operations.  When we have loop information
8983
44
    // available, if this block is within a loop, we should avoid using vperm
8984
44
    // for 3-operation perms and use a constant pool load instead.
8985
44
    if (Cost < 3)
8986
32
      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8987
488
  }
8988
488
8989
488
  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8990
488
  // vector that will get spilled to the constant pool.
8991
488
  if (V2.isUndef()) 
V2 = V1206
;
8992
488
8993
488
  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8994
488
  // that it is in input element units, not in bytes.  Convert now.
8995
488
8996
488
  // For little endian, the order of the input vectors is reversed, and
8997
488
  // the permutation mask is complemented with respect to 31.  This is
8998
488
  // necessary to produce proper semantics with the big-endian-biased vperm
8999
488
  // instruction.
9000
488
  EVT EltVT = V1.getValueType().getVectorElementType();
9001
488
  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9002
488
9003
488
  SmallVector<SDValue, 16> ResultMask;
9004
8.29k
  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; 
++i7.80k
) {
9005
7.80k
    unsigned SrcElt = PermMask[i] < 0 ? 
01.83k
:
PermMask[i]5.97k
;
9006
7.80k
9007
15.6k
    for (unsigned j = 0; j != BytesPerElement; 
++j7.80k
)
9008
7.80k
      if (isLittleEndian)
9009
5.04k
        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9010
5.04k
                                             dl, MVT::i32));
9011
2.76k
      else
9012
2.76k
        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9013
2.76k
                                             MVT::i32));
9014
7.80k
  }
9015
488
9016
488
  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9017
488
  if (isLittleEndian)
9018
315
    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9019
315
                       V2, V1, VPermMask);
9020
173
  else
9021
173
    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9022
173
                       V1, V2, VPermMask);
9023
488
}
9024
9025
/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9026
/// vector comparison.  If it is, return true and fill in Opc/isDot with
9027
/// information about the intrinsic.
9028
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9029
1.46k
                                 bool &isDot, const PPCSubtarget &Subtarget) {
9030
1.46k
  unsigned IntrinsicID =
9031
1.46k
      cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9032
1.46k
  CompareOpc = -1;
9033
1.46k
  isDot = false;
9034
1.46k
  switch (IntrinsicID) {
9035
1.46k
  default:
9036
1.43k
    return false;
9037
1.46k
  // Comparison predicates.
9038
1.46k
  case Intrinsic::ppc_altivec_vcmpbfp_p:
9039
1
    CompareOpc = 966;
9040
1
    isDot = true;
9041
1
    break;
9042
1.46k
  case Intrinsic::ppc_altivec_vcmpeqfp_p:
9043
1
    CompareOpc = 198;
9044
1
    isDot = true;
9045
1
    break;
9046
1.46k
  case Intrinsic::ppc_altivec_vcmpequb_p:
9047
0
    CompareOpc = 6;
9048
0
    isDot = true;
9049
0
    break;
9050
1.46k
  case Intrinsic::ppc_altivec_vcmpequh_p:
9051
2
    CompareOpc = 70;
9052
2
    isDot = true;
9053
2
    break;
9054
1.46k
  case Intrinsic::ppc_altivec_vcmpequw_p:
9055
1
    CompareOpc = 134;
9056
1
    isDot = true;
9057
1
    break;
9058
1.46k
  case Intrinsic::ppc_altivec_vcmpequd_p:
9059
2
    if (Subtarget.hasP8Altivec()) {
9060
2
      CompareOpc = 199;
9061
2
      isDot = true;
9062
2
    } else
9063
0
      return false;
9064
2
    break;
9065
2
  case Intrinsic::ppc_altivec_vcmpneb_p:
9066
0
  case Intrinsic::ppc_altivec_vcmpneh_p:
9067
0
  case Intrinsic::ppc_altivec_vcmpnew_p:
9068
0
  case Intrinsic::ppc_altivec_vcmpnezb_p:
9069
0
  case Intrinsic::ppc_altivec_vcmpnezh_p:
9070
0
  case Intrinsic::ppc_altivec_vcmpnezw_p:
9071
0
    if (Subtarget.hasP9Altivec()) {
9072
0
      switch (IntrinsicID) {
9073
0
      default:
9074
0
        llvm_unreachable("Unknown comparison intrinsic.");
9075
0
      case Intrinsic::ppc_altivec_vcmpneb_p:
9076
0
        CompareOpc = 7;
9077
0
        break;
9078
0
      case Intrinsic::ppc_altivec_vcmpneh_p:
9079
0
        CompareOpc = 71;
9080
0
        break;
9081
0
      case Intrinsic::ppc_altivec_vcmpnew_p:
9082
0
        CompareOpc = 135;
9083
0
        break;
9084
0
      case Intrinsic::ppc_altivec_vcmpnezb_p:
9085
0
        CompareOpc = 263;
9086
0
        break;
9087
0
      case Intrinsic::ppc_altivec_vcmpnezh_p:
9088
0
        CompareOpc = 327;
9089
0
        break;
9090
0
      case Intrinsic::ppc_altivec_vcmpnezw_p:
9091
0
        CompareOpc = 391;
9092
0
        break;
9093
0
      }
9094
0
      isDot = true;
9095
0
    } else
9096
0
      return false;
9097
0
    break;
9098
0
  case Intrinsic::ppc_altivec_vcmpgefp_p:
9099
0
    CompareOpc = 454;
9100
0
    isDot = true;
9101
0
    break;
9102
0
  case Intrinsic::ppc_altivec_vcmpgtfp_p:
9103
0
    CompareOpc = 710;
9104
0
    isDot = true;
9105
0
    break;
9106
0
  case Intrinsic::ppc_altivec_vcmpgtsb_p:
9107
0
    CompareOpc = 774;
9108
0
    isDot = true;
9109
0
    break;
9110
0
  case Intrinsic::ppc_altivec_vcmpgtsh_p:
9111
0
    CompareOpc = 838;
9112
0
    isDot = true;
9113
0
    break;
9114
2
  case Intrinsic::ppc_altivec_vcmpgtsw_p:
9115
2
    CompareOpc = 902;
9116
2
    isDot = true;
9117
2
    break;
9118
2
  case Intrinsic::ppc_altivec_vcmpgtsd_p:
9119
2
    if (Subtarget.hasP8Altivec()) {
9120
2
      CompareOpc = 967;
9121
2
      isDot = true;
9122
2
    } else
9123
0
      return false;
9124
2
    break;
9125
2
  case Intrinsic::ppc_altivec_vcmpgtub_p:
9126
0
    CompareOpc = 518;
9127
0
    isDot = true;
9128
0
    break;
9129
2
  case Intrinsic::ppc_altivec_vcmpgtuh_p:
9130
0
    CompareOpc = 582;
9131
0
    isDot = true;
9132
0
    break;
9133
2
  case Intrinsic::ppc_altivec_vcmpgtuw_p:
9134
0
    CompareOpc = 646;
9135
0
    isDot = true;
9136
0
    break;
9137
2
  case Intrinsic::ppc_altivec_vcmpgtud_p:
9138
2
    if (Subtarget.hasP8Altivec()) {
9139
2
      CompareOpc = 711;
9140
2
      isDot = true;
9141
2
    } else
9142
0
      return false;
9143
2
    break;
9144
2
9145
2
  // VSX predicate comparisons use the same infrastructure
9146
2
  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9147
0
  case Intrinsic::ppc_vsx_xvcmpgedp_p:
9148
0
  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9149
0
  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9150
0
  case Intrinsic::ppc_vsx_xvcmpgesp_p:
9151
0
  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9152
0
    if (Subtarget.hasVSX()) {
9153
0
      switch (IntrinsicID) {
9154
0
      case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9155
0
        CompareOpc = 99;
9156
0
        break;
9157
0
      case Intrinsic::ppc_vsx_xvcmpgedp_p:
9158
0
        CompareOpc = 115;
9159
0
        break;
9160
0
      case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9161
0
        CompareOpc = 107;
9162
0
        break;
9163
0
      case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9164
0
        CompareOpc = 67;
9165
0
        break;
9166
0
      case Intrinsic::ppc_vsx_xvcmpgesp_p:
9167
0
        CompareOpc = 83;
9168
0
        break;
9169
0
      case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9170
0
        CompareOpc = 75;
9171
0
        break;
9172
0
      }
9173
0
      isDot = true;
9174
0
    } else
9175
0
      return false;
9176
0
    break;
9177
0
9178
0
  // Normal Comparisons.
9179
1
  case Intrinsic::ppc_altivec_vcmpbfp:
9180
1
    CompareOpc = 966;
9181
1
    break;
9182
0
  case Intrinsic::ppc_altivec_vcmpeqfp:
9183
0
    CompareOpc = 198;
9184
0
    break;
9185
0
  case Intrinsic::ppc_altivec_vcmpequb:
9186
0
    CompareOpc = 6;
9187
0
    break;
9188
0
  case Intrinsic::ppc_altivec_vcmpequh:
9189
0
    CompareOpc = 70;
9190
0
    break;
9191
0
  case Intrinsic::ppc_altivec_vcmpequw:
9192
0
    CompareOpc = 134;
9193
0
    break;
9194
2
  case Intrinsic::ppc_altivec_vcmpequd:
9195
2
    if (Subtarget.hasP8Altivec())
9196
2
      CompareOpc = 199;
9197
0
    else
9198
0
      return false;
9199
2
    break;
9200
6
  case Intrinsic::ppc_altivec_vcmpneb:
9201
6
  case Intrinsic::ppc_altivec_vcmpneh:
9202
6
  case Intrinsic::ppc_altivec_vcmpnew:
9203
6
  case Intrinsic::ppc_altivec_vcmpnezb:
9204
6
  case Intrinsic::ppc_altivec_vcmpnezh:
9205
6
  case Intrinsic::ppc_altivec_vcmpnezw:
9206
6
    if (Subtarget.hasP9Altivec())
9207
6
      switch (IntrinsicID) {
9208
6
      default:
9209
0
        llvm_unreachable("Unknown comparison intrinsic.");
9210
6
      case Intrinsic::ppc_altivec_vcmpneb:
9211
1
        CompareOpc = 7;
9212
1
        break;
9213
6
      case Intrinsic::ppc_altivec_vcmpneh:
9214
1
        CompareOpc = 71;
9215
1
        break;
9216
6
      case Intrinsic::ppc_altivec_vcmpnew:
9217
1
        CompareOpc = 135;
9218
1
        break;
9219
6
      case Intrinsic::ppc_altivec_vcmpnezb:
9220
1
        CompareOpc = 263;
9221
1
        break;
9222
6
      case Intrinsic::ppc_altivec_vcmpnezh:
9223
1
        CompareOpc = 327;
9224
1
        break;
9225
6
      case Intrinsic::ppc_altivec_vcmpnezw:
9226
1
        CompareOpc = 391;
9227
1
        break;
9228
0
      }
9229
0
    else
9230
0
      return false;
9231
6
    break;
9232
6
  case Intrinsic::ppc_altivec_vcmpgefp:
9233
0
    CompareOpc = 454;
9234
0
    break;
9235
6
  case Intrinsic::ppc_altivec_vcmpgtfp:
9236
0
    CompareOpc = 710;
9237
0
    break;
9238
6
  case Intrinsic::ppc_altivec_vcmpgtsb:
9239
0
    CompareOpc = 774;
9240
0
    break;
9241
6
  case Intrinsic::ppc_altivec_vcmpgtsh:
9242
0
    CompareOpc = 838;
9243
0
    break;
9244
6
  case Intrinsic::ppc_altivec_vcmpgtsw:
9245
0
    CompareOpc = 902;
9246
0
    break;
9247
6
  case Intrinsic::ppc_altivec_vcmpgtsd:
9248
2
    if (Subtarget.hasP8Altivec())
9249
2
      CompareOpc = 967;
9250
0
    else
9251
0
      return false;
9252
2
    break;
9253
2
  case Intrinsic::ppc_altivec_vcmpgtub:
9254
0
    CompareOpc = 518;
9255
0
    break;
9256
2
  case Intrinsic::ppc_altivec_vcmpgtuh:
9257
0
    CompareOpc = 582;
9258
0
    break;
9259
2
  case Intrinsic::ppc_altivec_vcmpgtuw:
9260
0
    CompareOpc = 646;
9261
0
    break;
9262
2
  case Intrinsic::ppc_altivec_vcmpgtud:
9263
2
    if (Subtarget.hasP8Altivec())
9264
2
      CompareOpc = 711;
9265
0
    else
9266
0
      return false;
9267
2
    break;
9268
26
  }
9269
26
  return true;
9270
26
}
9271
9272
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9273
/// lower, do it, otherwise return null.
9274
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9275
1.46k
                                                   SelectionDAG &DAG) const {
9276
1.46k
  unsigned IntrinsicID =
9277
1.46k
    cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9278
1.46k
9279
1.46k
  SDLoc dl(Op);
9280
1.46k
9281
1.46k
  if (IntrinsicID == Intrinsic::thread_pointer) {
9282
3
    // Reads the thread pointer register, used for __builtin_thread_pointer.
9283
3
    if (Subtarget.isPPC64())
9284
2
      return DAG.getRegister(PPC::X13, MVT::i64);
9285
1
    return DAG.getRegister(PPC::R2, MVT::i32);
9286
1
  }
9287
1.45k
9288
1.45k
  // If this is a lowered altivec predicate compare, CompareOpc is set to the
9289
1.45k
  // opcode number of the comparison.
9290
1.45k
  int CompareOpc;
9291
1.45k
  bool isDot;
9292
1.45k
  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9293
1.43k
    return SDValue();    // Don't custom lower most intrinsics.
9294
20
9295
20
  // If this is a non-dot comparison, make the VCMP node and we are done.
9296
20
  if (!isDot) {
9297
13
    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9298
13
                              Op.getOperand(1), Op.getOperand(2),
9299
13
                              DAG.getConstant(CompareOpc, dl, MVT::i32));
9300
13
    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9301
13
  }
9302
7
9303
7
  // Create the PPCISD altivec 'dot' comparison node.
9304
7
  SDValue Ops[] = {
9305
7
    Op.getOperand(2),  // LHS
9306
7
    Op.getOperand(3),  // RHS
9307
7
    DAG.getConstant(CompareOpc, dl, MVT::i32)
9308
7
  };
9309
7
  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9310
7
  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
9311
7
9312
7
  // Now that we have the comparison, emit a copy from the CR to a GPR.
9313
7
  // This is flagged to the above dot comparison.
9314
7
  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9315
7
                                DAG.getRegister(PPC::CR6, MVT::i32),
9316
7
                                CompNode.getValue(1));
9317
7
9318
7
  // Unpack the result based on how the target uses it.
9319
7
  unsigned BitNo;   // Bit # of CR6.
9320
7
  bool InvertBit;   // Invert result?
9321
7
  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9322
7
  default:  // Can't happen, don't crash on invalid number though.
9323
0
  case 0:   // Return the value of the EQ bit of CR6.
9324
0
    BitNo = 0; InvertBit = false;
9325
0
    break;
9326
1
  case 1:   // Return the inverted value of the EQ bit of CR6.
9327
1
    BitNo = 0; InvertBit = true;
9328
1
    break;
9329
6
  case 2:   // Return the value of the LT bit of CR6.
9330
6
    BitNo = 2; InvertBit = false;
9331
6
    break;
9332
0
  case 3:   // Return the inverted value of the LT bit of CR6.
9333
0
    BitNo = 2; InvertBit = true;
9334
0
    break;
9335
7
  }
9336
7
9337
7
  // Shift the bit into the low position.
9338
7
  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9339
7
                      DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9340
7
  // Isolate the bit.
9341
7
  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9342
7
                      DAG.getConstant(1, dl, MVT::i32));
9343
7
9344
7
  // If we are supposed to, toggle the bit.
9345
7
  if (InvertBit)
9346
1
    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9347
1
                        DAG.getConstant(1, dl, MVT::i32));
9348
7
  return Flags;
9349
7
}
9350
9351
SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9352
1.87k
                                               SelectionDAG &DAG) const {
9353
1.87k
  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9354
1.87k
  // the beginning of the argument list.
9355
1.87k
  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 
00
: 1;
9356
1.87k
  SDLoc DL(Op);
9357
1.87k
  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9358
1.87k
  case Intrinsic::ppc_cfence: {
9359
23
    assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
9360
23
    assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
9361
23
    return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9362
23
                                      DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
9363
23
                                                  Op.getOperand(ArgStart + 1)),
9364
23
                                      Op.getOperand(0)),
9365
23
                   0);
9366
1.87k
  }
9367
1.87k
  default:
9368
1.85k
    break;
9369
1.85k
  }
9370
1.85k
  return SDValue();
9371
1.85k
}
9372
9373
40
SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
9374
40
  // Check for a DIV with the same operands as this REM.
9375
52
  for (auto UI : Op.getOperand(1)->uses()) {
9376
52
    if ((Op.getOpcode() == ISD::SREM && 
UI->getOpcode() == ISD::SDIV34
) ||
9377
52
        
(46
Op.getOpcode() == ISD::UREM46
&&
UI->getOpcode() == ISD::UDIV18
))
9378
8
      if (UI->getOperand(0) == Op.getOperand(0) &&
9379
8
          
UI->getOperand(1) == Op.getOperand(1)4
)
9380
4
        return SDValue();
9381
52
  }
9382
40
  
return Op36
;
9383
40
}
9384
9385
// Lower scalar BSWAP64 to xxbrd.
9386
1
SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9387
1
  SDLoc dl(Op);
9388
1
  // MTVSRDD
9389
1
  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9390
1
                   Op.getOperand(0));
9391
1
  // XXBRD
9392
1
  Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
9393
1
  // MFVSRD
9394
1
  int VectorIndex = 0;
9395
1
  if (Subtarget.isLittleEndian())
9396
1
    VectorIndex = 1;
9397
1
  Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
9398
1
                   DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9399
1
  return Op;
9400
1
}
9401
9402
// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9403
// compared to a value that is atomically loaded (atomic loads zero-extend).
9404
SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9405
112
                                                SelectionDAG &DAG) const {
9406
112
  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
9407
112
         "Expecting an atomic compare-and-swap here.");
9408
112
  SDLoc dl(Op);
9409
112
  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9410
112
  EVT MemVT = AtomicNode->getMemoryVT();
9411
112
  if (MemVT.getSizeInBits() >= 32)
9412
46
    return Op;
9413
66
9414
66
  SDValue CmpOp = Op.getOperand(2);
9415
66
  // If this is already correctly zero-extended, leave it alone.
9416
66
  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
9417
66
  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
9418
24
    return Op;
9419
42
9420
42
  // Clear the high bits of the compare operand.
9421
42
  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
9422
42
  SDValue NewCmpOp =
9423
42
    DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
9424
42
                DAG.getConstant(MaskVal, dl, MVT::i32));
9425
42
9426
42
  // Replace the existing compare operand with the properly zero-extended one.
9427
42
  SmallVector<SDValue, 4> Ops;
9428
210
  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; 
i++168
)
9429
168
    Ops.push_back(AtomicNode->getOperand(i));
9430
42
  Ops[2] = NewCmpOp;
9431
42
  MachineMemOperand *MMO = AtomicNode->getMemOperand();
9432
42
  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
9433
42
  auto NodeTy =
9434
42
    (MemVT == MVT::i8) ? 
PPCISD::ATOMIC_CMP_SWAP_820
:
PPCISD::ATOMIC_CMP_SWAP_1622
;
9435
42
  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
9436
42
}
9437
9438
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
9439
30
                                                 SelectionDAG &DAG) const {
9440
30
  SDLoc dl(Op);
9441
30
  // Create a stack slot that is 16-byte aligned.
9442
30
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9443
30
  int FrameIdx = MFI.CreateStackObject(16, 16, false);
9444
30
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
9445
30
  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9446
30
9447
30
  // Store the input value into Value#0 of the stack slot.
9448
30
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
9449
30
                               MachinePointerInfo());
9450
30
  // Load it out.
9451
30
  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
9452
30
}
9453
9454
SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9455
156
                                                  SelectionDAG &DAG) const {
9456
156
  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
9457
156
         "Should only be called for ISD::INSERT_VECTOR_ELT");
9458
156
9459
156
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
9460
156
  // We have legal lowering for constant indices but not for variable ones.
9461
156
  if (!C)
9462
4
    return SDValue();
9463
152
9464
152
  EVT VT = Op.getValueType();
9465
152
  SDLoc dl(Op);
9466
152
  SDValue V1 = Op.getOperand(0);
9467
152
  SDValue V2 = Op.getOperand(1);
9468
152
  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9469
152
  if (VT == MVT::v8i16 || 
VT == MVT::v16i8120
) {
9470
96
    SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
9471
96
    unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
9472
96
    unsigned InsertAtElement = C->getZExtValue();
9473
96
    unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
9474
96
    if (Subtarget.isLittleEndian()) {
9475
48
      InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
9476
48
    }
9477
96
    return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
9478
96
                       DAG.getConstant(InsertAtByte, dl, MVT::i32));
9479
96
  }
9480
56
  return Op;
9481
56
}
9482
9483
SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
9484
8
                                                   SelectionDAG &DAG) const {
9485
8
  SDLoc dl(Op);
9486
8
  SDNode *N = Op.getNode();
9487
8
9488
8
  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
9489
8
         "Unknown extract_vector_elt type");
9490
8
9491
8
  SDValue Value = N->getOperand(0);
9492
8
9493
8
  // The first part of this is like the store lowering except that we don't
9494
8
  // need to track the chain.
9495
8
9496
8
  // The values are now known to be -1 (false) or 1 (true). To convert this
9497
8
  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9498
8
  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9499
8
  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9500
8
9501
8
  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9502
8
  // understand how to form the extending load.
9503
8
  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9504
8
9505
8
  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9506
8
9507
8
  // Now convert to an integer and store.
9508
8
  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9509
8
    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9510
8
    Value);
9511
8
9512
8
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9513
8
  int FrameIdx = MFI.CreateStackObject(16, 16, false);
9514
8
  MachinePointerInfo PtrInfo =
9515
8
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9516
8
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
9517
8
  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9518
8
9519
8
  SDValue StoreChain = DAG.getEntryNode();
9520
8
  SDValue Ops[] = {StoreChain,
9521
8
                   DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9522
8
                   Value, FIdx};
9523
8
  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9524
8
9525
8
  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9526
8
    dl, VTs, Ops, MVT::v4i32, PtrInfo);
9527
8
9528
8
  // Extract the value requested.
9529
8
  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9530
8
  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9531
8
  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9532
8
9533
8
  SDValue IntVal =
9534
8
      DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
9535
8
9536
8
  if (!Subtarget.useCRBits())
9537
0
    return IntVal;
9538
8
9539
8
  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
9540
8
}
9541
9542
/// Lowering for QPX v4i1 loads
9543
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
9544
89
                                           SelectionDAG &DAG) const {
9545
89
  SDLoc dl(Op);
9546
89
  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
9547
89
  SDValue LoadChain = LN->getChain();
9548
89
  SDValue BasePtr = LN->getBasePtr();
9549
89
9550
89
  if (Op.getValueType() == MVT::v4f64 ||
9551
89
      
Op.getValueType() == MVT::v4f3222
) {
9552
87
    EVT MemVT = LN->getMemoryVT();
9553
87
    unsigned Alignment = LN->getAlignment();
9554
87
9555
87
    // If this load is properly aligned, then it is legal.
9556
87
    if (Alignment >= MemVT.getStoreSize())
9557
87
      return Op;
9558
0
9559
0
    EVT ScalarVT = Op.getValueType().getScalarType(),
9560
0
        ScalarMemVT = MemVT.getScalarType();
9561
0
    unsigned Stride = ScalarMemVT.getStoreSize();
9562
0
9563
0
    SDValue Vals[4], LoadChains[4];
9564
0
    for (unsigned Idx = 0; Idx < 4; ++Idx) {
9565
0
      SDValue Load;
9566
0
      if (ScalarVT != ScalarMemVT)
9567
0
        Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
9568
0
                              BasePtr,
9569
0
                              LN->getPointerInfo().getWithOffset(Idx * Stride),
9570
0
                              ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9571
0
                              LN->getMemOperand()->getFlags(), LN->getAAInfo());
9572
0
      else
9573
0
        Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
9574
0
                           LN->getPointerInfo().getWithOffset(Idx * Stride),
9575
0
                           MinAlign(Alignment, Idx * Stride),
9576
0
                           LN->getMemOperand()->getFlags(), LN->getAAInfo());
9577
0
9578
0
      if (Idx == 0 && LN->isIndexed()) {
9579
0
        assert(LN->getAddressingMode() == ISD::PRE_INC &&
9580
0
               "Unknown addressing mode on vector load");
9581
0
        Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
9582
0
                                  LN->getAddressingMode());
9583
0
      }
9584
0
9585
0
      Vals[Idx] = Load;
9586
0
      LoadChains[Idx] = Load.getValue(1);
9587
0
9588
0
      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9589
0
                            DAG.getConstant(Stride, dl,
9590
0
                                            BasePtr.getValueType()));
9591
0
    }
9592
0
9593
0
    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9594
0
    SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
9595
0
9596
0
    if (LN->isIndexed()) {
9597
0
      SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
9598
0
      return DAG.getMergeValues(RetOps, dl);
9599
0
    }
9600
0
9601
0
    SDValue RetOps[] = { Value, TF };
9602
0
    return DAG.getMergeValues(RetOps, dl);
9603
0
  }
9604
2
9605
2
  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
9606
2
  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
9607
2
9608
2
  // To lower v4i1 from a byte array, we load the byte elements of the
9609
2
  // vector and then reuse the BUILD_VECTOR logic.
9610
2
9611
2
  SDValue VectElmts[4], VectElmtChains[4];
9612
10
  for (unsigned i = 0; i < 4; 
++i8
) {
9613
8
    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9614
8
    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9615
8
9616
8
    VectElmts[i] = DAG.getExtLoad(
9617
8
        ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
9618
8
        LN->getPointerInfo().getWithOffset(i), MVT::i8,
9619
8
        /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
9620
8
    VectElmtChains[i] = VectElmts[i].getValue(1);
9621
8
  }
9622
2
9623
2
  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
9624
2
  SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
9625
2
9626
2
  SDValue RVals[] = { Value, LoadChain };
9627
2
  return DAG.getMergeValues(RVals, dl);
9628
2
}
9629
9630
/// Lowering for QPX v4i1 stores
9631
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
9632
47
                                            SelectionDAG &DAG) const {
9633
47
  SDLoc dl(Op);
9634
47
  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
9635
47
  SDValue StoreChain = SN->getChain();
9636
47
  SDValue BasePtr = SN->getBasePtr();
9637
47
  SDValue Value = SN->getValue();
9638
47
9639
47
  if (Value.getValueType() == MVT::v4f64 ||
9640
47
      
Value.getValueType() == MVT::v4f3210
) {
9641
45
    EVT MemVT = SN->getMemoryVT();
9642
45
    unsigned Alignment = SN->getAlignment();
9643
45
9644
45
    // If this store is properly aligned, then it is legal.
9645
45
    if (Alignment >= MemVT.getStoreSize())
9646
18
      return Op;
9647
27
9648
27
    EVT ScalarVT = Value.getValueType().getScalarType(),
9649
27
        ScalarMemVT = MemVT.getScalarType();
9650
27
    unsigned Stride = ScalarMemVT.getStoreSize();
9651
27
9652
27
    SDValue Stores[4];
9653
135
    for (unsigned Idx = 0; Idx < 4; 
++Idx108
) {
9654
108
      SDValue Ex = DAG.getNode(
9655
108
          ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
9656
108
          DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
9657
108
      SDValue Store;
9658
108
      if (ScalarVT != ScalarMemVT)
9659
0
        Store =
9660
0
            DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
9661
0
                              SN->getPointerInfo().getWithOffset(Idx * Stride),
9662
0
                              ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9663
0
                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
9664
108
      else
9665
108
        Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
9666
108
                             SN->getPointerInfo().getWithOffset(Idx * Stride),
9667
108
                             MinAlign(Alignment, Idx * Stride),
9668
108
                             SN->getMemOperand()->getFlags(), SN->getAAInfo());
9669
108
9670
108
      if (Idx == 0 && 
SN->isIndexed()27
) {
9671
0
        assert(SN->getAddressingMode() == ISD::PRE_INC &&
9672
0
               "Unknown addressing mode on vector store");
9673
0
        Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
9674
0
                                    SN->getAddressingMode());
9675
0
      }
9676
108
9677
108
      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9678
108
                            DAG.getConstant(Stride, dl,
9679
108
                                            BasePtr.getValueType()));
9680
108
      Stores[Idx] = Store;
9681
108
    }
9682
27
9683
27
    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9684
27
9685
27
    if (SN->isIndexed()) {
9686
0
      SDValue RetOps[] = { TF, Stores[0].getValue(1) };
9687
0
      return DAG.getMergeValues(RetOps, dl);
9688
0
    }
9689
27
9690
27
    return TF;
9691
27
  }
9692
2
9693
2
  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
9694
2
  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
9695
2
9696
2
  // The values are now known to be -1 (false) or 1 (true). To convert this
9697
2
  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9698
2
  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9699
2
  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9700
2
9701
2
  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9702
2
  // understand how to form the extending load.
9703
2
  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9704
2
9705
2
  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9706
2
9707
2
  // Now convert to an integer and store.
9708
2
  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9709
2
    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9710
2
    Value);
9711
2
9712
2
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9713
2
  int FrameIdx = MFI.CreateStackObject(16, 16, false);
9714
2
  MachinePointerInfo PtrInfo =
9715
2
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9716
2
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
9717
2
  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9718
2
9719
2
  SDValue Ops[] = {StoreChain,
9720
2
                   DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9721
2
                   Value, FIdx};
9722
2
  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9723
2
9724
2
  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9725
2
    dl, VTs, Ops, MVT::v4i32, PtrInfo);
9726
2
9727
2
  // Move data into the byte array.
9728
2
  SDValue Loads[4], LoadChains[4];
9729
10
  for (unsigned i = 0; i < 4; 
++i8
) {
9730
8
    unsigned Offset = 4*i;
9731
8
    SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9732
8
    Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9733
8
9734
8
    Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
9735
8
                           PtrInfo.getWithOffset(Offset));
9736
8
    LoadChains[i] = Loads[i].getValue(1);
9737
8
  }
9738
2
9739
2
  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9740
2
9741
2
  SDValue Stores[4];
9742
10
  for (unsigned i = 0; i < 4; 
++i8
) {
9743
8
    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9744
8
    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9745
8
9746
8
    Stores[i] = DAG.getTruncStore(
9747
8
        StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
9748
8
        MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
9749
8
        SN->getAAInfo());
9750
8
  }
9751
2
9752
2
  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9753
2
9754
2
  return StoreChain;
9755
2
}
9756
9757
17
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
9758
17
  SDLoc dl(Op);
9759
17
  if (Op.getValueType() == MVT::v4i32) {
9760
7
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9761
7
9762
7
    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
9763
7
    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
9764
7
9765
7
    SDValue RHSSwap =   // = vrlw RHS, 16
9766
7
      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
9767
7
9768
7
    // Shrinkify inputs to v8i16.
9769
7
    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
9770
7
    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
9771
7
    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
9772
7
9773
7
    // Low parts multiplied together, generating 32-bit results (we ignore the
9774
7
    // top parts).
9775
7
    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
9776
7
                                        LHS, RHS, DAG, dl, MVT::v4i32);
9777
7
9778
7
    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
9779
7
                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
9780
7
    // Shift the high parts up 16 bits.
9781
7
    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
9782
7
                              Neg16, DAG, dl);
9783
7
    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
9784
10
  } else if (Op.getValueType() == MVT::v8i16) {
9785
5
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9786
5
9787
5
    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
9788
5
9789
5
    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
9790
5
                            LHS, RHS, Zero, DAG, dl);
9791
5
  } else if (Op.getValueType() == MVT::v16i8) {
9792
5
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9793
5
    bool isLittleEndian = Subtarget.isLittleEndian();
9794
5
9795
5
    // Multiply the even 8-bit parts, producing 16-bit sums.
9796
5
    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
9797
5
                                           LHS, RHS, DAG, dl, MVT::v8i16);
9798
5
    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
9799
5
9800
5
    // Multiply the odd 8-bit parts, producing 16-bit sums.
9801
5
    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
9802
5
                                          LHS, RHS, DAG, dl, MVT::v8i16);
9803
5
    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
9804
5
9805
5
    // Merge the results together.  Because vmuleub and vmuloub are
9806
5
    // instructions with a big-endian bias, we must reverse the
9807
5
    // element numbering and reverse the meaning of "odd" and "even"
9808
5
    // when generating little endian code.
9809
5
    int Ops[16];
9810
45
    for (unsigned i = 0; i != 8; 
++i40
) {
9811
40
      if (isLittleEndian) {
9812
16
        Ops[i*2  ] = 2*i;
9813
16
        Ops[i*2+1] = 2*i+16;
9814
24
      } else {
9815
24
        Ops[i*2  ] = 2*i+1;
9816
24
        Ops[i*2+1] = 2*i+1+16;
9817
24
      }
9818
40
    }
9819
5
    if (isLittleEndian)
9820
2
      return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
9821
3
    else
9822
3
      return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
9823
0
  } else {
9824
0
    llvm_unreachable("Unknown mul to lower!");
9825
0
  }
9826
17
}
9827
9828
45
SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
9829
45
9830
45
  assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
9831
45
9832
45
  EVT VT = Op.getValueType();
9833
45
  assert(VT.isVector() &&
9834
45
         "Only set vector abs as custom, scalar abs shouldn't reach here!");
9835
45
  assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
9836
45
          VT == MVT::v16i8) &&
9837
45
         "Unexpected vector element type!");
9838
45
  assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
9839
45
         "Current subtarget doesn't support smax v2i64!");
9840
45
9841
45
  // For vector abs, it can be lowered to:
9842
45
  // abs x
9843
45
  // ==>
9844
45
  // y = -x
9845
45
  // smax(x, y)
9846
45
9847
45
  SDLoc dl(Op);
9848
45
  SDValue X = Op.getOperand(0);
9849
45
  SDValue Zero = DAG.getConstant(0, dl, VT);
9850
45
  SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
9851
45
9852
45
  // SMAX patch https://reviews.llvm.org/D47332
9853
45
  // hasn't landed yet, so use intrinsic first here.
9854
45
  // TODO: Should use SMAX directly once SMAX patch landed
9855
45
  Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
9856
45
  if (VT == MVT::v2i64)
9857
3
    BifID = Intrinsic::ppc_altivec_vmaxsd;
9858
42
  else if (VT == MVT::v8i16)
9859
14
    BifID = Intrinsic::ppc_altivec_vmaxsh;
9860
28
  else if (VT == MVT::v16i8)
9861
14
    BifID = Intrinsic::ppc_altivec_vmaxsb;
9862
45
9863
45
  return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
9864
45
}
9865
9866
// Custom lowering for fpext vf32 to v2f64
9867
8
SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
9868
8
9869
8
  assert(Op.getOpcode() == ISD::FP_EXTEND &&
9870
8
         "Should only be called for ISD::FP_EXTEND");
9871
8
9872
8
  // We only want to custom lower an extend from v2f32 to v2f64.
9873
8
  if (Op.getValueType() != MVT::v2f64 ||
9874
8
      Op.getOperand(0).getValueType() != MVT::v2f32)
9875
0
    return SDValue();
9876
8
9877
8
  SDLoc dl(Op);
9878
8
  SDValue Op0 = Op.getOperand(0);
9879
8
9880
8
  switch (Op0.getOpcode()) {
9881
8
  default:
9882
0
    return SDValue();
9883
8
  case ISD::FADD:
9884
6
  case ISD::FMUL:
9885
6
  case ISD::FSUB: {
9886
6
    SDValue NewLoad[2];
9887
18
    for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; 
++i12
) {
9888
12
      // Ensure both input are loads.
9889
12
      SDValue LdOp = Op0.getOperand(i);
9890
12
      if (LdOp.getOpcode() != ISD::LOAD)
9891
0
        return SDValue();
9892
12
      // Generate new load node.
9893
12
      LoadSDNode *LD = cast<LoadSDNode>(LdOp);
9894
12
      SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9895
12
      NewLoad[i] =
9896
12
        DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
9897
12
                                DAG.getVTList(MVT::v4f32, MVT::Other),
9898
12
                                LoadOps, LD->getMemoryVT(),
9899
12
                                LD->getMemOperand());
9900
12
    }
9901
6
    SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
9902
6
                              NewLoad[0], NewLoad[1],
9903
6
                              Op0.getNode()->getFlags());
9904
6
    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
9905
6
  }
9906
6
  case ISD::LOAD: {
9907
2
    LoadSDNode *LD = cast<LoadSDNode>(Op0);
9908
2
    SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9909
2
    SDValue NewLd =
9910
2
      DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
9911
2
                              DAG.getVTList(MVT::v4f32, MVT::Other),
9912
2
                              LoadOps, LD->getMemoryVT(), LD->getMemOperand());
9913
2
    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
9914
0
  }
9915
0
  }
9916
0
  llvm_unreachable("ERROR:Should return for all cases within swtich.");
9917
0
}
9918
9919
/// LowerOperation - Provide custom lowering hooks for some operations.
9920
///
9921
19.0k
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
9922
19.0k
  switch (Op.getOpcode()) {
9923
19.0k
  
default: 0
llvm_unreachable0
("Wasn't expecting to be able to lower this!");
9924
19.0k
  
case ISD::ConstantPool: return LowerConstantPool(Op, DAG)1.79k
;
9925
19.0k
  
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG)14
;
9926
19.0k
  
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG)2.03k
;
9927
19.0k
  
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG)63
;
9928
19.0k
  
case ISD::JumpTable: return LowerJumpTable(Op, DAG)13
;
9929
19.0k
  
case ISD::SETCC: return LowerSETCC(Op, DAG)38
;
9930
19.0k
  
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG)1
;
9931
19.0k
  
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG)1
;
9932
19.0k
9933
19.0k
  // Variable argument lowering.
9934
19.0k
  
case ISD::VASTART: return LowerVASTART(Op, DAG)8
;
9935
19.0k
  
case ISD::VAARG: return LowerVAARG(Op, DAG)2
;
9936
19.0k
  
case ISD::VACOPY: return LowerVACOPY(Op, DAG)1
;
9937
19.0k
9938
19.0k
  
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG)1
;
9939
19.0k
  
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG)21
;
9940
19.0k
  case ISD::GET_DYNAMIC_AREA_OFFSET:
9941
1
    return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
9942
19.0k
9943
19.0k
  // Exception handling lowering.
9944
19.0k
  
case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG)1
;
9945
19.0k
  
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG)8
;
9946
19.0k
  
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG)5
;
9947
19.0k
9948
19.0k
  
case ISD::LOAD: return LowerLOAD(Op, DAG)102
;
9949
19.0k
  
case ISD::STORE: return LowerSTORE(Op, DAG)48
;
9950
19.0k
  
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG)0
;
9951
19.0k
  
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG)267
;
9952
19.0k
  case ISD::FP_TO_UINT:
9953
1.76k
  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
9954
1.76k
  case ISD::UINT_TO_FP:
9955
1.05k
  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
9956
1.05k
  
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG)1
;
9957
1.05k
9958
1.05k
  // Lower 64-bit shifts.
9959
1.05k
  
case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG)10
;
9960
1.05k
  
case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG)9
;
9961
1.05k
  
case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG)8
;
9962
1.05k
9963
1.05k
  // Vector-related lowering.
9964
4.24k
  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
9965
3.75k
  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
9966
1.46k
  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
9967
1.05k
  
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG)30
;
9968
1.05k
  
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG)8
;
9969
1.05k
  
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG)156
;
9970
1.05k
  
case ISD::MUL: return LowerMUL(Op, DAG)17
;
9971
1.05k
  
case ISD::ABS: return LowerABS(Op, DAG)45
;
9972
1.05k
  
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG)8
;
9973
1.05k
9974
1.05k
  // For counter-based loop handling.
9975
1.05k
  
case ISD::INTRINSIC_W_CHAIN: return SDValue()0
;
9976
1.05k
9977
1.05k
  
case ISD::BITCAST: return LowerBITCAST(Op, DAG)12
;
9978
1.05k
9979
1.05k
  // Frame & Return address.
9980
1.05k
  
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG)7
;
9981
1.05k
  
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG)10
;
9982
1.05k
9983
1.87k
  case ISD::INTRINSIC_VOID:
9984
1.87k
    return LowerINTRINSIC_VOID(Op, DAG);
9985
1.05k
  case ISD::SREM:
9986
40
  case ISD::UREM:
9987
40
    return LowerREM(Op, DAG);
9988
40
  case ISD::BSWAP:
9989
1
    return LowerBSWAP(Op, DAG);
9990
112
  case ISD::ATOMIC_CMP_SWAP:
9991
112
    return LowerATOMIC_CMP_SWAP(Op, DAG);
9992
19.0k
  }
9993
19.0k
}
9994
9995
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
9996
                                           SmallVectorImpl<SDValue>&Results,
9997
28
                                           SelectionDAG &DAG) const {
9998
28
  SDLoc dl(N);
9999
28
  switch (N->getOpcode()) {
10000
28
  default:
10001
0
    llvm_unreachable("Do not know how to custom type legalize this operation!");
10002
28
  case ISD::READCYCLECOUNTER: {
10003
2
    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10004
2
    SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10005
2
10006
2
    Results.push_back(RTB);
10007
2
    Results.push_back(RTB.getValue(1));
10008
2
    Results.push_back(RTB.getValue(2));
10009
2
    break;
10010
28
  }
10011
28
  case ISD::INTRINSIC_W_CHAIN: {
10012
1
    if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10013
1
        Intrinsic::loop_decrement)
10014
0
      break;
10015
1
10016
1
    assert(N->getValueType(0) == MVT::i1 &&
10017
1
           "Unexpected result type for CTR decrement intrinsic");
10018
1
    EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10019
1
                                 N->getValueType(0));
10020
1
    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10021
1
    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10022
1
                                 N->getOperand(1));
10023
1
10024
1
    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10025
1
    Results.push_back(NewInt.getValue(1));
10026
1
    break;
10027
1
  }
10028
1
  case ISD::VAARG: {
10029
0
    if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10030
0
      return;
10031
0
10032
0
    EVT VT = N->getValueType(0);
10033
0
10034
0
    if (VT == MVT::i64) {
10035
0
      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10036
0
10037
0
      Results.push_back(NewNode);
10038
0
      Results.push_back(NewNode.getValue(1));
10039
0
    }
10040
0
    return;
10041
0
  }
10042
1
  case ISD::FP_TO_SINT:
10043
1
  case ISD::FP_TO_UINT:
10044
1
    // LowerFP_TO_INT() can only handle f32 and f64.
10045
1
    if (N->getOperand(0).getValueType() == MVT::ppcf128)
10046
0
      return;
10047
1
    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10048
1
    return;
10049
24
  case ISD::TRUNCATE: {
10050
24
    EVT TrgVT = N->getValueType(0);
10051
24
    if (TrgVT.isVector() &&
10052
24
        isOperationCustom(N->getOpcode(), TrgVT) &&
10053
24
        N->getOperand(0).getValueType().getSizeInBits() <= 128)
10054
24
      Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
10055
24
    return;
10056
1
  }
10057
1
  case ISD::BITCAST:
10058
0
    // Don't handle bitcast here.
10059
0
    return;
10060
28
  }
10061
28
}
10062
10063
//===----------------------------------------------------------------------===//
10064
//  Other Lowering Code
10065
//===----------------------------------------------------------------------===//
10066
10067
711
static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10068
711
  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10069
711
  Function *Func = Intrinsic::getDeclaration(M, Id);
10070
711
  return Builder.CreateCall(Func, {});
10071
711
}
10072
10073
// The mappings for emitLeading/TrailingFence is taken from
10074
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10075
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10076
                                                 Instruction *Inst,
10077
494
                                                 AtomicOrdering Ord) const {
10078
494
  if (Ord == AtomicOrdering::SequentiallyConsistent)
10079
135
    return callIntrinsic(Builder, Intrinsic::ppc_sync);
10080
359
  if (isReleaseOrStronger(Ord))
10081
234
    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10082
125
  return nullptr;
10083
125
}
10084
10085
Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10086
                                                  Instruction *Inst,
10087
494
                                                  AtomicOrdering Ord) const {
10088
494
  if (Inst->hasAtomicLoad() && 
isAcquireOrStronger(Ord)473
) {
10089
365
    // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10090
365
    // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10091
365
    // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10092
365
    if (isa<LoadInst>(Inst) && 
Subtarget.isPPC64()27
)
10093
23
      return Builder.CreateCall(
10094
23
          Intrinsic::getDeclaration(
10095
23
              Builder.GetInsertBlock()->getParent()->getParent(),
10096
23
              Intrinsic::ppc_cfence, {Inst->getType()}),
10097
23
          {Inst});
10098
342
    // FIXME: Can use isync for rmw operation.
10099
342
    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10100
342
  }
10101
129
  return nullptr;
10102
129
}
10103
10104
MachineBasicBlock *
10105
PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10106
                                    unsigned AtomicSize,
10107
                                    unsigned BinOpcode,
10108
                                    unsigned CmpOpcode,
10109
483
                                    unsigned CmpPred) const {
10110
483
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10111
483
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10112
483
10113
483
  auto LoadMnemonic = PPC::LDARX;
10114
483
  auto StoreMnemonic = PPC::STDCX;
10115
483
  switch (AtomicSize) {
10116
483
  default:
10117
0
    llvm_unreachable("Unexpected size of atomic entity");
10118
483
  case 1:
10119
120
    LoadMnemonic = PPC::LBARX;
10120
120
    StoreMnemonic = PPC::STBCX;
10121
120
    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10122
120
    break;
10123
483
  case 2:
10124
120
    LoadMnemonic = PPC::LHARX;
10125
120
    StoreMnemonic = PPC::STHCX;
10126
120
    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10127
120
    break;
10128
483
  case 4:
10129
120
    LoadMnemonic = PPC::LWARX;
10130
120
    StoreMnemonic = PPC::STWCX;
10131
120
    break;
10132
483
  case 8:
10133
123
    LoadMnemonic = PPC::LDARX;
10134
123
    StoreMnemonic = PPC::STDCX;
10135
123
    break;
10136
483
  }
10137
483
10138
483
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10139
483
  MachineFunction *F = BB->getParent();
10140
483
  MachineFunction::iterator It = ++BB->getIterator();
10141
483
10142
483
  Register dest = MI.getOperand(0).getReg();
10143
483
  Register ptrA = MI.getOperand(1).getReg();
10144
483
  Register ptrB = MI.getOperand(2).getReg();
10145
483
  Register incr = MI.getOperand(3).getReg();
10146
483
  DebugLoc dl = MI.getDebugLoc();
10147
483
10148
483
  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10149
483
  MachineBasicBlock *loop2MBB =
10150
483
    CmpOpcode ? 
F->CreateMachineBasicBlock(LLVM_BB)180
:
nullptr303
;
10151
483
  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10152
483
  F->insert(It, loopMBB);
10153
483
  if (CmpOpcode)
10154
180
    F->insert(It, loop2MBB);
10155
483
  F->insert(It, exitMBB);
10156
483
  exitMBB->splice(exitMBB->begin(), BB,
10157
483
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10158
483
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10159
483
10160
483
  MachineRegisterInfo &RegInfo = F->getRegInfo();
10161
483
  Register TmpReg = (!BinOpcode) ? 
incr231
:
10162
483
    
RegInfo.createVirtualRegister( AtomicSize == 8 252
?
&PPC::G8RCRegClass65
10163
252
                                           : 
&PPC::GPRCRegClass187
);
10164
483
10165
483
  //  thisMBB:
10166
483
  //   ...
10167
483
  //   fallthrough --> loopMBB
10168
483
  BB->addSuccessor(loopMBB);
10169
483
10170
483
  //  loopMBB:
10171
483
  //   l[wd]arx dest, ptr
10172
483
  //   add r0, dest, incr
10173
483
  //   st[wd]cx. r0, ptr
10174
483
  //   bne- loopMBB
10175
483
  //   fallthrough --> exitMBB
10176
483
10177
483
  // For max/min...
10178
483
  //  loopMBB:
10179
483
  //   l[wd]arx dest, ptr
10180
483
  //   cmpl?[wd] incr, dest
10181
483
  //   bgt exitMBB
10182
483
  //  loop2MBB:
10183
483
  //   st[wd]cx. dest, ptr
10184
483
  //   bne- loopMBB
10185
483
  //   fallthrough --> exitMBB
10186
483
10187
483
  BB = loopMBB;
10188
483
  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10189
483
    .addReg(ptrA).addReg(ptrB);
10190
483
  if (BinOpcode)
10191
252
    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10192
483
  if (CmpOpcode) {
10193
180
    // Signed comparisons of byte or halfword values must be sign-extended.
10194
180
    if (CmpOpcode == PPC::CMPW && 
AtomicSize < 470
) {
10195
48
      unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10196
48
      BuildMI(BB, dl, TII->get(AtomicSize == 1 ? 
PPC::EXTSB24
:
PPC::EXTSH24
),
10197
48
              ExtReg).addReg(dest);
10198
48
      BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10199
48
        .addReg(incr).addReg(ExtReg);
10200
48
    } else
10201
132
      BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10202
132
        .addReg(incr).addReg(dest);
10203
180
10204
180
    BuildMI(BB, dl, TII->get(PPC::BCC))
10205
180
      .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10206
180
    BB->addSuccessor(loop2MBB);
10207
180
    BB->addSuccessor(exitMBB);
10208
180
    BB = loop2MBB;
10209
180
  }
10210
483
  BuildMI(BB, dl, TII->get(StoreMnemonic))
10211
483
    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10212
483
  BuildMI(BB, dl, TII->get(PPC::BCC))
10213
483
    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10214
483
  BB->addSuccessor(loopMBB);
10215
483
  BB->addSuccessor(exitMBB);
10216
483
10217
483
  //  exitMBB:
10218
483
  //   ...
10219
483
  BB = exitMBB;
10220
483
  return BB;
10221
483
}
10222
10223
MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
10224
    MachineInstr &MI, MachineBasicBlock *BB,
10225
    bool is8bit, // operation
10226
260
    unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10227
260
  // If we support part-word atomic mnemonics, just use them
10228
260
  if (Subtarget.hasPartwordAtomics())
10229
240
    return EmitAtomicBinary(MI, BB, is8bit ? 
1120
:
2120
, BinOpcode, CmpOpcode,
10230
240
                            CmpPred);
10231
20
10232
20
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10233
20
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10234
20
  // In 64 bit mode we have to use 64 bits for addresses, even though the
10235
20
  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
10236
20
  // registers without caring whether they're 32 or 64, but here we're
10237
20
  // doing actual arithmetic on the addresses.
10238
20
  bool is64bit = Subtarget.isPPC64();
10239
20
  bool isLittleEndian = Subtarget.isLittleEndian();
10240
20
  unsigned ZeroReg = is64bit ? 
PPC::ZERO818
:
PPC::ZERO2
;
10241
20
10242
20
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10243
20
  MachineFunction *F = BB->getParent();
10244
20
  MachineFunction::iterator It = ++BB->getIterator();
10245
20
10246
20
  unsigned dest = MI.getOperand(0).getReg();
10247
20
  unsigned ptrA = MI.getOperand(1).getReg();
10248
20
  unsigned ptrB = MI.getOperand(2).getReg();
10249
20
  unsigned incr = MI.getOperand(3).getReg();
10250
20
  DebugLoc dl = MI.getDebugLoc();
10251
20
10252
20
  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10253
20
  MachineBasicBlock *loop2MBB =
10254
20
      CmpOpcode ? 
F->CreateMachineBasicBlock(LLVM_BB)8
:
nullptr12
;
10255
20
  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10256
20
  F->insert(It, loopMBB);
10257
20
  if (CmpOpcode)
10258
8
    F->insert(It, loop2MBB);
10259
20
  F->insert(It, exitMBB);
10260
20
  exitMBB->splice(exitMBB->begin(), BB,
10261
20
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10262
20
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10263
20
10264
20
  MachineRegisterInfo &RegInfo = F->getRegInfo();
10265
20
  const TargetRegisterClass *RC =
10266
20
      is64bit ? 
&PPC::G8RCRegClass18
:
&PPC::GPRCRegClass2
;
10267
20
  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10268
20
10269
20
  Register PtrReg = RegInfo.createVirtualRegister(RC);
10270
20
  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10271
20
  Register ShiftReg =
10272
20
      isLittleEndian ? 
Shift1Reg0
: RegInfo.createVirtualRegister(GPRC);
10273
20
  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10274
20
  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10275
20
  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10276
20
  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10277
20
  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10278
20
  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10279
20
  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10280
20
  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10281
20
  Register Ptr1Reg;
10282
20
  Register TmpReg =
10283
20
      (!BinOpcode) ? 
Incr2Reg12
:
RegInfo.createVirtualRegister(GPRC)8
;
10284
20
10285
20
  //  thisMBB:
10286
20
  //   ...
10287
20
  //   fallthrough --> loopMBB
10288
20
  BB->addSuccessor(loopMBB);
10289
20
10290
20
  // The 4-byte load must be aligned, while a char or short may be
10291
20
  // anywhere in the word.  Hence all this nasty bookkeeping code.
10292
20
  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10293
20
  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10294
20
  //   xori shift, shift1, 24 [16]
10295
20
  //   rlwinm ptr, ptr1, 0, 0, 29
10296
20
  //   slw incr2, incr, shift
10297
20
  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10298
20
  //   slw mask, mask2, shift
10299
20
  //  loopMBB:
10300
20
  //   lwarx tmpDest, ptr
10301
20
  //   add tmp, tmpDest, incr2
10302
20
  //   andc tmp2, tmpDest, mask
10303
20
  //   and tmp3, tmp, mask
10304
20
  //   or tmp4, tmp3, tmp2
10305
20
  //   stwcx. tmp4, ptr
10306
20
  //   bne- loopMBB
10307
20
  //   fallthrough --> exitMBB
10308
20
  //   srw dest, tmpDest, shift
10309
20
  if (ptrA != ZeroReg) {
10310
0
    Ptr1Reg = RegInfo.createVirtualRegister(RC);
10311
0
    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10312
0
        .addReg(ptrA)
10313
0
        .addReg(ptrB);
10314
20
  } else {
10315
20
    Ptr1Reg = ptrB;
10316
20
  }
10317
20
  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10318
20
  // mode.
10319
20
  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10320
20
      .addReg(Ptr1Reg, 0, is64bit ? 
PPC::sub_3218
:
02
)
10321
20
      .addImm(3)
10322
20
      .addImm(27)
10323
20
      .addImm(is8bit ? 
2810
:
2710
);
10324
20
  if (!isLittleEndian)
10325
20
    BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10326
20
        .addReg(Shift1Reg)
10327
20
        .addImm(is8bit ? 
2410
:
1610
);
10328
20
  if (is64bit)
10329
18
    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10330
18
        .addReg(Ptr1Reg)
10331
18
        .addImm(0)
10332
18
        .addImm(61);
10333
2
  else
10334
2
    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10335
2
        .addReg(Ptr1Reg)
10336
2
        .addImm(0)
10337
2
        .addImm(0)
10338
2
        .addImm(29);
10339
20
  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10340
20
  if (is8bit)
10341
10
    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10342
10
  else {
10343
10
    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10344
10
    BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10345
10
        .addReg(Mask3Reg)
10346
10
        .addImm(65535);
10347
10
  }
10348
20
  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10349
20
      .addReg(Mask2Reg)
10350
20
      .addReg(ShiftReg);
10351
20
10352
20
  BB = loopMBB;
10353
20
  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10354
20
      .addReg(ZeroReg)
10355
20
      .addReg(PtrReg);
10356
20
  if (BinOpcode)
10357
8
    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10358
8
        .addReg(Incr2Reg)
10359
8
        .addReg(TmpDestReg);
10360
20
  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10361
20
      .addReg(TmpDestReg)
10362
20
      .addReg(MaskReg);
10363
20
  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10364
20
  if (CmpOpcode) {
10365
8
    // For unsigned comparisons, we can directly compare the shifted values.
10366
8
    // For signed comparisons we shift and sign extend.
10367
8
    unsigned SReg = RegInfo.createVirtualRegister(GPRC);
10368
8
    BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10369
8
        .addReg(TmpDestReg)
10370
8
        .addReg(MaskReg);
10371
8
    unsigned ValueReg = SReg;
10372
8
    unsigned CmpReg = Incr2Reg;
10373
8
    if (CmpOpcode == PPC::CMPW) {
10374
4
      ValueReg = RegInfo.createVirtualRegister(GPRC);
10375
4
      BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10376
4
          .addReg(SReg)
10377
4
          .addReg(ShiftReg);
10378
4
      unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
10379
4
      BuildMI(BB, dl, TII->get(is8bit ? 
PPC::EXTSB2
:
PPC::EXTSH2
), ValueSReg)
10380
4
          .addReg(ValueReg);
10381
4
      ValueReg = ValueSReg;
10382
4
      CmpReg = incr;
10383
4
    }
10384
8
    BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10385
8
        .addReg(CmpReg)
10386
8
        .addReg(ValueReg);
10387
8
    BuildMI(BB, dl, TII->get(PPC::BCC))
10388
8
        .addImm(CmpPred)
10389
8
        .addReg(PPC::CR0)
10390
8
        .addMBB(exitMBB);
10391
8
    BB->addSuccessor(loop2MBB);
10392
8
    BB->addSuccessor(exitMBB);
10393
8
    BB = loop2MBB;
10394
8
  }
10395
20
  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10396
20
  BuildMI(BB, dl, TII->get(PPC::STWCX))
10397
20
      .addReg(Tmp4Reg)
10398
20
      .addReg(ZeroReg)
10399
20
      .addReg(PtrReg);
10400
20
  BuildMI(BB, dl, TII->get(PPC::BCC))
10401
20
      .addImm(PPC::PRED_NE)
10402
20
      .addReg(PPC::CR0)
10403
20
      .addMBB(loopMBB);
10404
20
  BB->addSuccessor(loopMBB);
10405
20
  BB->addSuccessor(exitMBB);
10406
20
10407
20
  //  exitMBB:
10408
20
  //   ...
10409
20
  BB = exitMBB;
10410
20
  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10411
20
      .addReg(TmpDestReg)
10412
20
      .addReg(ShiftReg);
10413
20
  return BB;
10414
20
}
10415
10416
llvm::MachineBasicBlock *
10417
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
10418
8
                                    MachineBasicBlock *MBB) const {
10419
8
  DebugLoc DL = MI.getDebugLoc();
10420
8
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10421
8
  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10422
8
10423
8
  MachineFunction *MF = MBB->getParent();
10424
8
  MachineRegisterInfo &MRI = MF->getRegInfo();
10425
8
10426
8
  const BasicBlock *BB = MBB->getBasicBlock();
10427
8
  MachineFunction::iterator I = ++MBB->getIterator();
10428
8
10429
8
  unsigned DstReg = MI.getOperand(0).getReg();
10430
8
  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10431
8
  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10432
8
  unsigned mainDstReg = MRI.createVirtualRegister(RC);
10433
8
  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
10434
8
10435
8
  MVT PVT = getPointerTy(MF->getDataLayout());
10436
8
  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10437
8
         "Invalid Pointer Size!");
10438
8
  // For v = setjmp(buf), we generate
10439
8
  //
10440
8
  // thisMBB:
10441
8
  //  SjLjSetup mainMBB
10442
8
  //  bl mainMBB
10443
8
  //  v_restore = 1
10444
8
  //  b sinkMBB
10445
8
  //
10446
8
  // mainMBB:
10447
8
  //  buf[LabelOffset] = LR
10448
8
  //  v_main = 0
10449
8
  //
10450
8
  // sinkMBB:
10451
8
  //  v = phi(main, restore)
10452
8
  //
10453
8
10454
8
  MachineBasicBlock *thisMBB = MBB;
10455
8
  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10456
8
  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10457
8
  MF->insert(I, mainMBB);
10458
8
  MF->insert(I, sinkMBB);
10459
8
10460
8
  MachineInstrBuilder MIB;
10461
8
10462
8
  // Transfer the remainder of BB and its successor edges to sinkMBB.
10463
8
  sinkMBB->splice(sinkMBB->begin(), MBB,
10464
8
                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10465
8
  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10466
8
10467
8
  // Note that the structure of the jmp_buf used here is not compatible
10468
8
  // with that used by libc, and is not designed to be. Specifically, it
10469
8
  // stores only those 'reserved' registers that LLVM does not otherwise
10470
8
  // understand how to spill. Also, by convention, by the time this
10471
8
  // intrinsic is called, Clang has already stored the frame address in the
10472
8
  // first slot of the buffer and stack address in the third. Following the
10473
8
  // X86 target code, we'll store the jump address in the second slot. We also
10474
8
  // need to save the TOC pointer (R2) to handle jumps between shared
10475
8
  // libraries, and that will be stored in the fourth slot. The thread
10476
8
  // identifier (R13) is not affected.
10477
8
10478
8
  // thisMBB:
10479
8
  const int64_t LabelOffset = 1 * PVT.getStoreSize();
10480
8
  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
10481
8
  const int64_t BPOffset    = 4 * PVT.getStoreSize();
10482
8
10483
8
  // Prepare IP either in reg.
10484
8
  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
10485
8
  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
10486
8
  unsigned BufReg = MI.getOperand(1).getReg();
10487
8
10488
8
  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
10489
8
    setUsesTOCBasePtr(*MBB->getParent());
10490
8
    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10491
8
              .addReg(PPC::X2)
10492
8
              .addImm(TOCOffset)
10493
8
              .addReg(BufReg)
10494
8
              .cloneMemRefs(MI);
10495
8
  }
10496
8
10497
8
  // Naked functions never have a base pointer, and so we use r1. For all
10498
8
  // other functions, this decision must be delayed until during PEI.
10499
8
  unsigned BaseReg;
10500
8
  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
10501
0
    BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10502
8
  else
10503
8
    BaseReg = Subtarget.isPPC64() ? PPC::BP8 : 
PPC::BP0
;
10504
8
10505
8
  MIB = BuildMI(*thisMBB, MI, DL,
10506
8
                TII->get(Subtarget.isPPC64() ? PPC::STD : 
PPC::STW0
))
10507
8
            .addReg(BaseReg)
10508
8
            .addImm(BPOffset)
10509
8
            .addReg(BufReg)
10510
8
            .cloneMemRefs(MI);
10511
8
10512
8
  // Setup
10513
8
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
10514
8
  MIB.addRegMask(TRI->getNoPreservedMask());
10515
8
10516
8
  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
10517
8
10518
8
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
10519
8
          .addMBB(mainMBB);
10520
8
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
10521
8
10522
8
  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
10523
8
  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
10524
8
10525
8
  // mainMBB:
10526
8
  //  mainDstReg = 0
10527
8
  MIB =
10528
8
      BuildMI(mainMBB, DL,
10529
8
              TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : 
PPC::MFLR0
), LabelReg);
10530
8
10531
8
  // Store IP
10532
8
  if (Subtarget.isPPC64()) {
10533
8
    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
10534
8
            .addReg(LabelReg)
10535
8
            .addImm(LabelOffset)
10536
8
            .addReg(BufReg);
10537
8
  } else {
10538
0
    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
10539
0
            .addReg(LabelReg)
10540
0
            .addImm(LabelOffset)
10541
0
            .addReg(BufReg);
10542
0
  }
10543
8
  MIB.cloneMemRefs(MI);
10544
8
10545
8
  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
10546
8
  mainMBB->addSuccessor(sinkMBB);
10547
8
10548
8
  // sinkMBB:
10549
8
  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
10550
8
          TII->get(PPC::PHI), DstReg)
10551
8
    .addReg(mainDstReg).addMBB(mainMBB)
10552
8
    .addReg(restoreDstReg).addMBB(thisMBB);
10553
8
10554
8
  MI.eraseFromParent();
10555
8
  return sinkMBB;
10556
8
}
10557
10558
MachineBasicBlock *
10559
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
10560
5
                                     MachineBasicBlock *MBB) const {
10561
5
  DebugLoc DL = MI.getDebugLoc();
10562
5
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10563
5
10564
5
  MachineFunction *MF = MBB->getParent();
10565
5
  MachineRegisterInfo &MRI = MF->getRegInfo();
10566
5
10567
5
  MVT PVT = getPointerTy(MF->getDataLayout());
10568
5
  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10569
5
         "Invalid Pointer Size!");
10570
5
10571
5
  const TargetRegisterClass *RC =
10572
5
    (PVT == MVT::i64) ? &PPC::G8RCRegClass : 
&PPC::GPRCRegClass0
;
10573
5
  unsigned Tmp = MRI.createVirtualRegister(RC);
10574
5
  // Since FP is only updated here but NOT referenced, it's treated as GPR.
10575
5
  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : 
PPC::R310
;
10576
5
  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : 
PPC::R10
;
10577
5
  unsigned BP =
10578
5
      (PVT == MVT::i64)
10579
5
          ? PPC::X30
10580
5
          : 
(0
Subtarget.isSVR4ABI()0
&&
isPositionIndependent()0
?
PPC::R290
10581
0
                                                              : PPC::R30);
10582
5
10583
5
  MachineInstrBuilder MIB;
10584
5
10585
5
  const int64_t LabelOffset = 1 * PVT.getStoreSize();
10586
5
  const int64_t SPOffset    = 2 * PVT.getStoreSize();
10587
5
  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
10588
5
  const int64_t BPOffset    = 4 * PVT.getStoreSize();
10589
5
10590
5
  unsigned BufReg = MI.getOperand(0).getReg();
10591
5
10592
5
  // Reload FP (the jumped-to function may not have had a
10593
5
  // frame pointer, and if so, then its r31 will be restored
10594
5
  // as necessary).
10595
5
  if (PVT == MVT::i64) {
10596
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
10597
5
            .addImm(0)
10598
5
            .addReg(BufReg);
10599
5
  } else {
10600
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
10601
0
            .addImm(0)
10602
0
            .addReg(BufReg);
10603
0
  }
10604
5
  MIB.cloneMemRefs(MI);
10605
5
10606
5
  // Reload IP
10607
5
  if (PVT == MVT::i64) {
10608
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
10609
5
            .addImm(LabelOffset)
10610
5
            .addReg(BufReg);
10611
5
  } else {
10612
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
10613
0
            .addImm(LabelOffset)
10614
0
            .addReg(BufReg);
10615
0
  }
10616
5
  MIB.cloneMemRefs(MI);
10617
5
10618
5
  // Reload SP
10619
5
  if (PVT == MVT::i64) {
10620
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
10621
5
            .addImm(SPOffset)
10622
5
            .addReg(BufReg);
10623
5
  } else {
10624
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
10625
0
            .addImm(SPOffset)
10626
0
            .addReg(BufReg);
10627
0
  }
10628
5
  MIB.cloneMemRefs(MI);
10629
5
10630
5
  // Reload BP
10631
5
  if (PVT == MVT::i64) {
10632
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
10633
5
            .addImm(BPOffset)
10634
5
            .addReg(BufReg);
10635
5
  } else {
10636
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
10637
0
            .addImm(BPOffset)
10638
0
            .addReg(BufReg);
10639
0
  }
10640
5
  MIB.cloneMemRefs(MI);
10641
5
10642
5
  // Reload TOC
10643
5
  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
10644
5
    setUsesTOCBasePtr(*MBB->getParent());
10645
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
10646
5
              .addImm(TOCOffset)
10647
5
              .addReg(BufReg)
10648
5
              .cloneMemRefs(MI);
10649
5
  }
10650
5
10651
5
  // Jump
10652
5
  BuildMI(*MBB, MI, DL,
10653
5
          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : 
PPC::MTCTR0
)).addReg(Tmp);
10654
5
  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : 
PPC::BCTR0
));
10655
5
10656
5
  MI.eraseFromParent();
10657
5
  return MBB;
10658
5
}
10659
10660
MachineBasicBlock *
10661
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
10662
1.49k
                                               MachineBasicBlock *BB) const {
10663
1.49k
  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
10664
1.49k
      
MI.getOpcode() == TargetOpcode::PATCHPOINT1.47k
) {
10665
59
    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
10666
59
        MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10667
40
      // Call lowering should have added an r2 operand to indicate a dependence
10668
40
      // on the TOC base pointer value. It can't however, because there is no
10669
40
      // way to mark the dependence as implicit there, and so the stackmap code
10670
40
      // will confuse it with a regular operand. Instead, add the dependence
10671
40
      // here.
10672
40
      MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
10673
40
    }
10674
59
10675
59
    return emitPatchPoint(MI, BB);
10676
59
  }
10677
1.43k
10678
1.43k
  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
10679
1.43k
      MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
10680
8
    return emitEHSjLjSetJmp(MI, BB);
10681
1.42k
  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
10682
1.42k
             MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
10683
5
    return emitEHSjLjLongJmp(MI, BB);
10684
5
  }
10685
1.42k
10686
1.42k
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10687
1.42k
10688
1.42k
  // To "insert" these instructions we actually have to insert their
10689
1.42k
  // control-flow patterns.
10690
1.42k
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10691
1.42k
  MachineFunction::iterator It = ++BB->getIterator();
10692
1.42k
10693
1.42k
  MachineFunction *F = BB->getParent();
10694
1.42k
10695
1.42k
  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10696
1.42k
      
MI.getOpcode() == PPC::SELECT_CC_I81.27k
||
MI.getOpcode() == PPC::SELECT_I41.12k
||
10697
1.42k
      
MI.getOpcode() == PPC::SELECT_I81.07k
) {
10698
508
    SmallVector<MachineOperand, 2> Cond;
10699
508
    if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10700
508
        
MI.getOpcode() == PPC::SELECT_CC_I8364
)
10701
294
      Cond.push_back(MI.getOperand(4));
10702
214
    else
10703
214
      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
10704
508
    Cond.push_back(MI.getOperand(1));
10705
508
10706
508
    DebugLoc dl = MI.getDebugLoc();
10707
508
    TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
10708
508
                      MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
10709
915
  } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10710
915
             MI.getOpcode() == PPC::SELECT_CC_I8 ||
10711
915
             MI.getOpcode() == PPC::SELECT_CC_F4 ||
10712
915
             
MI.getOpcode() == PPC::SELECT_CC_F8879
||
10713
915
             
MI.getOpcode() == PPC::SELECT_CC_F16860
||
10714
915
             
MI.getOpcode() == PPC::SELECT_CC_QFRC857
||
10715
915
             
MI.getOpcode() == PPC::SELECT_CC_QSRC857
||
10716
915
             
MI.getOpcode() == PPC::SELECT_CC_QBRC857
||
10717
915
             
MI.getOpcode() == PPC::SELECT_CC_VRRC857
||
10718
915
             
MI.getOpcode() == PPC::SELECT_CC_VSFRC857
||
10719
915
             
MI.getOpcode() == PPC::SELECT_CC_VSSRC838
||
10720
915
             
MI.getOpcode() == PPC::SELECT_CC_VSRC838
||
10721
915
             
MI.getOpcode() == PPC::SELECT_CC_SPE4838
||
10722
915
             
MI.getOpcode() == PPC::SELECT_CC_SPE838
||
10723
915
             
MI.getOpcode() == PPC::SELECT_I4838
||
10724
915
             
MI.getOpcode() == PPC::SELECT_I8838
||
10725
915
             
MI.getOpcode() == PPC::SELECT_F4838
||
10726
915
             
MI.getOpcode() == PPC::SELECT_F8817
||
10727
915
             
MI.getOpcode() == PPC::SELECT_F16805
||
10728
915
             
MI.getOpcode() == PPC::SELECT_QFRC803
||
10729
915
             
MI.getOpcode() == PPC::SELECT_QSRC803
||
10730
915
             
MI.getOpcode() == PPC::SELECT_QBRC803
||
10731
915
             
MI.getOpcode() == PPC::SELECT_SPE803
||
10732
915
             
MI.getOpcode() == PPC::SELECT_SPE4802
||
10733
915
             
MI.getOpcode() == PPC::SELECT_VRRC802
||
10734
915
             
MI.getOpcode() == PPC::SELECT_VSFRC802
||
10735
915
             
MI.getOpcode() == PPC::SELECT_VSSRC776
||
10736
915
             
MI.getOpcode() == PPC::SELECT_VSRC774
) {
10737
141
    // The incoming instruction knows the destination vreg to set, the
10738
141
    // condition code register to branch on, the true/false values to
10739
141
    // select between, and a branch opcode to use.
10740
141
10741
141
    //  thisMBB:
10742
141
    //  ...
10743
141
    //   TrueVal = ...
10744
141
    //   cmpTY ccX, r1, r2
10745
141
    //   bCC copy1MBB
10746
141
    //   fallthrough --> copy0MBB
10747
141
    MachineBasicBlock *thisMBB = BB;
10748
141
    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
10749
141
    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10750
141
    DebugLoc dl = MI.getDebugLoc();
10751
141
    F->insert(It, copy0MBB);
10752
141
    F->insert(It, sinkMBB);
10753
141
10754
141
    // Transfer the remainder of BB and its successor edges to sinkMBB.
10755
141
    sinkMBB->splice(sinkMBB->begin(), BB,
10756
141
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
10757
141
    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10758
141
10759
141
    // Next, add the true and fallthrough blocks as its successors.
10760
141
    BB->addSuccessor(copy0MBB);
10761
141
    BB->addSuccessor(sinkMBB);
10762
141
10763
141
    if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
10764
141
        MI.getOpcode() == PPC::SELECT_F4 || 
MI.getOpcode() == PPC::SELECT_F8120
||
10765
141
        
MI.getOpcode() == PPC::SELECT_F16108
||
10766
141
        
MI.getOpcode() == PPC::SELECT_SPE4106
||
10767
141
        
MI.getOpcode() == PPC::SELECT_SPE106
||
10768
141
        
MI.getOpcode() == PPC::SELECT_QFRC105
||
10769
141
        
MI.getOpcode() == PPC::SELECT_QSRC105
||
10770
141
        
MI.getOpcode() == PPC::SELECT_QBRC105
||
10771
141
        
MI.getOpcode() == PPC::SELECT_VRRC105
||
10772
141
        
MI.getOpcode() == PPC::SELECT_VSFRC105
||
10773
141
        
MI.getOpcode() == PPC::SELECT_VSSRC79
||
10774
141
        
MI.getOpcode() == PPC::SELECT_VSRC77
) {
10775
64
      BuildMI(BB, dl, TII->get(PPC::BC))
10776
64
          .addReg(MI.getOperand(1).getReg())
10777
64
          .addMBB(sinkMBB);
10778
77
    } else {
10779
77
      unsigned SelectPred = MI.getOperand(4).getImm();
10780
77
      BuildMI(BB, dl, TII->get(PPC::BCC))
10781
77
          .addImm(SelectPred)
10782
77
          .addReg(MI.getOperand(1).getReg())
10783
77
          .addMBB(sinkMBB);
10784
77
    }
10785
141
10786
141
    //  copy0MBB:
10787
141
    //   %FalseValue = ...
10788
141
    //   # fallthrough to sinkMBB
10789
141
    BB = copy0MBB;
10790
141
10791
141
    // Update machine-CFG edges
10792
141
    BB->addSuccessor(sinkMBB);
10793
141
10794
141
    //  sinkMBB:
10795
141
    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10796
141
    //  ...
10797
141
    BB = sinkMBB;
10798
141
    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
10799
141
        .addReg(MI.getOperand(3).getReg())
10800
141
        .addMBB(copy0MBB)
10801
141
        .addReg(MI.getOperand(2).getReg())
10802
141
        .addMBB(thisMBB);
10803
774
  } else if (MI.getOpcode() == PPC::ReadTB) {
10804
2
    // To read the 64-bit time-base register on a 32-bit target, we read the
10805
2
    // two halves. Should the counter have wrapped while it was being read, we
10806
2
    // need to try again.
10807
2
    // ...
10808
2
    // readLoop:
10809
2
    // mfspr Rx,TBU # load from TBU
10810
2
    // mfspr Ry,TB  # load from TB
10811
2
    // mfspr Rz,TBU # load from TBU
10812
2
    // cmpw crX,Rx,Rz # check if 'old'='new'
10813
2
    // bne readLoop   # branch if they're not equal
10814
2
    // ...
10815
2
10816
2
    MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
10817
2
    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10818
2
    DebugLoc dl = MI.getDebugLoc();
10819
2
    F->insert(It, readMBB);
10820
2
    F->insert(It, sinkMBB);
10821
2
10822
2
    // Transfer the remainder of BB and its successor edges to sinkMBB.
10823
2
    sinkMBB->splice(sinkMBB->begin(), BB,
10824
2
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
10825
2
    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10826
2
10827
2
    BB->addSuccessor(readMBB);
10828
2
    BB = readMBB;
10829
2
10830
2
    MachineRegisterInfo &RegInfo = F->getRegInfo();
10831
2
    unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10832
2
    unsigned LoReg = MI.getOperand(0).getReg();
10833
2
    unsigned HiReg = MI.getOperand(1).getReg();
10834
2
10835
2
    BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
10836
2
    BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
10837
2
    BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
10838
2
10839
2
    unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10840
2
10841
2
    BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
10842
2
        .addReg(HiReg)
10843
2
        .addReg(ReadAgainReg);
10844
2
    BuildMI(BB, dl, TII->get(PPC::BCC))
10845
2
        .addImm(PPC::PRED_NE)
10846
2
        .addReg(CmpReg)
10847
2
        .addMBB(readMBB);
10848
2
10849
2
    BB->addSuccessor(readMBB);
10850
2
    BB->addSuccessor(sinkMBB);
10851
772
  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
10852
16
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
10853
756
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
10854
14
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
10855
742
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
10856
13
    BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
10857
729
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
10858
14
    BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
10859
715
10860
715
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
10861
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
10862
705
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
10863
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
10864
695
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
10865
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
10866
685
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
10867
11
    BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
10868
674
10869
674
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
10870
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
10871
664
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
10872
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
10873
654
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
10874
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
10875
644
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
10876
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
10877
634
10878
634
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
10879
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
10880
624
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
10881
12
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
10882
612
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
10883
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
10884
602
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
10885
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
10886
592
10887
592
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
10888
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
10889
582
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
10890
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
10891
572
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
10892
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
10893
562
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
10894
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
10895
552
10896
552
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
10897
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
10898
542
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
10899
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
10900
532
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
10901
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
10902
522
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
10903
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
10904
512
10905
512
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
10906
13
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
10907
499
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
10908
13
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
10909
486
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
10910
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
10911
475
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
10912
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
10913
464
10914
464
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
10915
13
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
10916
451
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
10917
13
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
10918
438
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
10919
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
10920
427
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
10921
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
10922
416
10923
416
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
10924
12
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
10925
404
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
10926
12
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
10927
392
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
10928
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
10929
381
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
10930
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
10931
370
10932
370
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
10933
12
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
10934
358
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
10935
12
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
10936
346
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
10937
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
10938
335
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
10939
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
10940
324
10941
324
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
10942
14
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
10943
310
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
10944
14
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
10945
296
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
10946
13
    BB = EmitAtomicBinary(MI, BB, 4, 0);
10947
283
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
10948
14
    BB = EmitAtomicBinary(MI, BB, 8, 0);
10949
269
  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
10950
269
           
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64246
||
10951
269
           
(221
Subtarget.hasPartwordAtomics()221
&&
10952
221
            
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8143
) ||
10953
269
           
(199
Subtarget.hasPartwordAtomics()199
&&
10954
199
            
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16121
)) {
10955
93
    bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
10956
93
10957
93
    auto LoadMnemonic = PPC::LDARX;
10958
93
    auto StoreMnemonic = PPC::STDCX;
10959
93
    switch (MI.getOpcode()) {
10960
93
    default:
10961
0
      llvm_unreachable("Compare and swap of unknown size");
10962
93
    case PPC::ATOMIC_CMP_SWAP_I8:
10963
22
      LoadMnemonic = PPC::LBARX;
10964
22
      StoreMnemonic = PPC::STBCX;
10965
22
      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10966
22
      break;
10967
93
    case PPC::ATOMIC_CMP_SWAP_I16:
10968
23
      LoadMnemonic = PPC::LHARX;
10969
23
      StoreMnemonic = PPC::STHCX;
10970
23
      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10971
23
      break;
10972
93
    case PPC::ATOMIC_CMP_SWAP_I32:
10973
23
      LoadMnemonic = PPC::LWARX;
10974
23
      StoreMnemonic = PPC::STWCX;
10975
23
      break;
10976
93
    case PPC::ATOMIC_CMP_SWAP_I64:
10977
25
      LoadMnemonic = PPC::LDARX;
10978
25
      StoreMnemonic = PPC::STDCX;
10979
25
      break;
10980
93
    }
10981
93
    unsigned dest = MI.getOperand(0).getReg();
10982
93
    unsigned ptrA = MI.getOperand(1).getReg();
10983
93
    unsigned ptrB = MI.getOperand(2).getReg();
10984
93
    unsigned oldval = MI.getOperand(3).getReg();
10985
93
    unsigned newval = MI.getOperand(4).getReg();
10986
93
    DebugLoc dl = MI.getDebugLoc();
10987
93
10988
93
    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10989
93
    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10990
93
    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10991
93
    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10992
93
    F->insert(It, loop1MBB);
10993
93
    F->insert(It, loop2MBB);
10994
93
    F->insert(It, midMBB);
10995
93
    F->insert(It, exitMBB);
10996
93
    exitMBB->splice(exitMBB->begin(), BB,
10997
93
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
10998
93
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10999
93
11000
93
    //  thisMBB:
11001
93
    //   ...
11002
93
    //   fallthrough --> loopMBB
11003
93
    BB->addSuccessor(loop1MBB);
11004
93
11005
93
    // loop1MBB:
11006
93
    //   l[bhwd]arx dest, ptr
11007
93
    //   cmp[wd] dest, oldval
11008
93
    //   bne- midMBB
11009
93
    // loop2MBB:
11010
93
    //   st[bhwd]cx. newval, ptr
11011
93
    //   bne- loopMBB
11012
93
    //   b exitBB
11013
93
    // midMBB:
11014
93
    //   st[bhwd]cx. dest, ptr
11015
93
    // exitBB:
11016
93
    BB = loop1MBB;
11017
93
    BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11018
93
    BuildMI(BB, dl, TII->get(is64bit ? 
PPC::CMPD25
:
PPC::CMPW68
), PPC::CR0)
11019
93
        .addReg(oldval)
11020
93
        .addReg(dest);
11021
93
    BuildMI(BB, dl, TII->get(PPC::BCC))
11022
93
        .addImm(PPC::PRED_NE)
11023
93
        .addReg(PPC::CR0)
11024
93
        .addMBB(midMBB);
11025
93
    BB->addSuccessor(loop2MBB);
11026
93
    BB->addSuccessor(midMBB);
11027
93
11028
93
    BB = loop2MBB;
11029
93
    BuildMI(BB, dl, TII->get(StoreMnemonic))
11030
93
        .addReg(newval)
11031
93
        .addReg(ptrA)
11032
93
        .addReg(ptrB);
11033
93
    BuildMI(BB, dl, TII->get(PPC::BCC))
11034
93
        .addImm(PPC::PRED_NE)
11035
93
        .addReg(PPC::CR0)
11036
93
        .addMBB(loop1MBB);
11037
93
    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11038
93
    BB->addSuccessor(loop1MBB);
11039
93
    BB->addSuccessor(exitMBB);
11040
93
11041
93
    BB = midMBB;
11042
93
    BuildMI(BB, dl, TII->get(StoreMnemonic))
11043
93
        .addReg(dest)
11044
93
        .addReg(ptrA)
11045
93
        .addReg(ptrB);
11046
93
    BB->addSuccessor(exitMBB);
11047
93
11048
93
    //  exitMBB:
11049
93
    //   ...
11050
93
    BB = exitMBB;
11051
176
  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11052
176
             
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16172
) {
11053
9
    // We must use 64-bit registers for addresses when targeting 64-bit,
11054
9
    // since we're actually doing arithmetic on them.  Other registers
11055
9
    // can be 32-bit.
11056
9
    bool is64bit = Subtarget.isPPC64();
11057
9
    bool isLittleEndian = Subtarget.isLittleEndian();
11058
9
    bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11059
9
11060
9
    unsigned dest = MI.getOperand(0).getReg();
11061
9
    unsigned ptrA = MI.getOperand(1).getReg();
11062
9
    unsigned ptrB = MI.getOperand(2).getReg();
11063
9
    unsigned oldval = MI.getOperand(3).getReg();
11064
9
    unsigned newval = MI.getOperand(4).getReg();
11065
9
    DebugLoc dl = MI.getDebugLoc();
11066
9
11067
9
    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11068
9
    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11069
9
    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11070
9
    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11071
9
    F->insert(It, loop1MBB);
11072
9
    F->insert(It, loop2MBB);
11073
9
    F->insert(It, midMBB);
11074
9
    F->insert(It, exitMBB);
11075
9
    exitMBB->splice(exitMBB->begin(), BB,
11076
9
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
11077
9
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11078
9
11079
9
    MachineRegisterInfo &RegInfo = F->getRegInfo();
11080
9
    const TargetRegisterClass *RC =
11081
9
        is64bit ? 
&PPC::G8RCRegClass7
:
&PPC::GPRCRegClass2
;
11082
9
    const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11083
9
11084
9
    Register PtrReg = RegInfo.createVirtualRegister(RC);
11085
9
    Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11086
9
    Register ShiftReg =
11087
9
        isLittleEndian ? 
Shift1Reg1
:
RegInfo.createVirtualRegister(GPRC)8
;
11088
9
    Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11089
9
    Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11090
9
    Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11091
9
    Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11092
9
    Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11093
9
    Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11094
9
    Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11095
9
    Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11096
9
    Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11097
9
    Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11098
9
    Register Ptr1Reg;
11099
9
    Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11100
9
    Register ZeroReg = is64bit ? 
PPC::ZERO87
:
PPC::ZERO2
;
11101
9
    //  thisMBB:
11102
9
    //   ...
11103
9
    //   fallthrough --> loopMBB
11104
9
    BB->addSuccessor(loop1MBB);
11105
9
11106
9
    // The 4-byte load must be aligned, while a char or short may be
11107
9
    // anywhere in the word.  Hence all this nasty bookkeeping code.
11108
9
    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11109
9
    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11110
9
    //   xori shift, shift1, 24 [16]
11111
9
    //   rlwinm ptr, ptr1, 0, 0, 29
11112
9
    //   slw newval2, newval, shift
11113
9
    //   slw oldval2, oldval,shift
11114
9
    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11115
9
    //   slw mask, mask2, shift
11116
9
    //   and newval3, newval2, mask
11117
9
    //   and oldval3, oldval2, mask
11118
9
    // loop1MBB:
11119
9
    //   lwarx tmpDest, ptr
11120
9
    //   and tmp, tmpDest, mask
11121
9
    //   cmpw tmp, oldval3
11122
9
    //   bne- midMBB
11123
9
    // loop2MBB:
11124
9
    //   andc tmp2, tmpDest, mask
11125
9
    //   or tmp4, tmp2, newval3
11126
9
    //   stwcx. tmp4, ptr
11127
9
    //   bne- loop1MBB
11128
9
    //   b exitBB
11129
9
    // midMBB:
11130
9
    //   stwcx. tmpDest, ptr
11131
9
    // exitBB:
11132
9
    //   srw dest, tmpDest, shift
11133
9
    if (ptrA != ZeroReg) {
11134
0
      Ptr1Reg = RegInfo.createVirtualRegister(RC);
11135
0
      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11136
0
          .addReg(ptrA)
11137
0
          .addReg(ptrB);
11138
9
    } else {
11139
9
      Ptr1Reg = ptrB;
11140
9
    }
11141
9
11142
9
    // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11143
9
    // mode.
11144
9
    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11145
9
        .addReg(Ptr1Reg, 0, is64bit ? 
PPC::sub_327
:
02
)
11146
9
        .addImm(3)
11147
9
        .addImm(27)
11148
9
        .addImm(is8bit ? 
284
:
275
);
11149
9
    if (!isLittleEndian)
11150
8
      BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11151
8
          .addReg(Shift1Reg)
11152
8
          .addImm(is8bit ? 
244
:
164
);
11153
9
    if (is64bit)
11154
7
      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11155
7
          .addReg(Ptr1Reg)
11156
7
          .addImm(0)
11157
7
          .addImm(61);
11158
2
    else
11159
2
      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11160
2
          .addReg(Ptr1Reg)
11161
2
          .addImm(0)
11162
2
          .addImm(0)
11163
2
          .addImm(29);
11164
9
    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11165
9
        .addReg(newval)
11166
9
        .addReg(ShiftReg);
11167
9
    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11168
9
        .addReg(oldval)
11169
9
        .addReg(ShiftReg);
11170
9
    if (is8bit)
11171
4
      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11172
5
    else {
11173
5
      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11174
5
      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11175
5
          .addReg(Mask3Reg)
11176
5
          .addImm(65535);
11177
5
    }
11178
9
    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11179
9
        .addReg(Mask2Reg)
11180
9
        .addReg(ShiftReg);
11181
9
    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11182
9
        .addReg(NewVal2Reg)
11183
9
        .addReg(MaskReg);
11184
9
    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11185
9
        .addReg(OldVal2Reg)
11186
9
        .addReg(MaskReg);
11187
9
11188
9
    BB = loop1MBB;
11189
9
    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11190
9
        .addReg(ZeroReg)
11191
9
        .addReg(PtrReg);
11192
9
    BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11193
9
        .addReg(TmpDestReg)
11194
9
        .addReg(MaskReg);
11195
9
    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11196
9
        .addReg(TmpReg)
11197
9
        .addReg(OldVal3Reg);
11198
9
    BuildMI(BB, dl, TII->get(PPC::BCC))
11199
9
        .addImm(PPC::PRED_NE)
11200
9
        .addReg(PPC::CR0)
11201
9
        .addMBB(midMBB);
11202
9
    BB->addSuccessor(loop2MBB);
11203
9
    BB->addSuccessor(midMBB);
11204
9
11205
9
    BB = loop2MBB;
11206
9
    BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11207
9
        .addReg(TmpDestReg)
11208
9
        .addReg(MaskReg);
11209
9
    BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11210
9
        .addReg(Tmp2Reg)
11211
9
        .addReg(NewVal3Reg);
11212
9
    BuildMI(BB, dl, TII->get(PPC::STWCX))
11213
9
        .addReg(Tmp4Reg)
11214
9
        .addReg(ZeroReg)
11215
9
        .addReg(PtrReg);
11216
9
    BuildMI(BB, dl, TII->get(PPC::BCC))
11217
9
        .addImm(PPC::PRED_NE)
11218
9
        .addReg(PPC::CR0)
11219
9
        .addMBB(loop1MBB);
11220
9
    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11221
9
    BB->addSuccessor(loop1MBB);
11222
9
    BB->addSuccessor(exitMBB);
11223
9
11224
9
    BB = midMBB;
11225
9
    BuildMI(BB, dl, TII->get(PPC::STWCX))
11226
9
        .addReg(TmpDestReg)
11227
9
        .addReg(ZeroReg)
11228
9
        .addReg(PtrReg);
11229
9
    BB->addSuccessor(exitMBB);
11230
9
11231
9
    //  exitMBB:
11232
9
    //   ...
11233
9
    BB = exitMBB;
11234
9
    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11235
9
        .addReg(TmpReg)
11236
9
        .addReg(ShiftReg);
11237
167
  } else if (MI.getOpcode() == PPC::FADDrtz) {
11238
8
    // This pseudo performs an FADD with rounding mode temporarily forced
11239
8
    // to round-to-zero.  We emit this via custom inserter since the FPSCR
11240
8
    // is not modeled at the SelectionDAG level.
11241
8
    unsigned Dest = MI.getOperand(0).getReg();
11242
8
    unsigned Src1 = MI.getOperand(1).getReg();
11243
8
    unsigned Src2 = MI.getOperand(2).getReg();
11244
8
    DebugLoc dl = MI.getDebugLoc();
11245
8
11246
8
    MachineRegisterInfo &RegInfo = F->getRegInfo();
11247
8
    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11248
8
11249
8
    // Save FPSCR value.
11250
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11251
8
11252
8
    // Set rounding mode to round-to-zero.
11253
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
11254
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
11255
8
11256
8
    // Perform addition.
11257
8
    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
11258
8
11259
8
    // Restore FPSCR value.
11260
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11261
159
  } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11262
159
             
MI.getOpcode() == PPC::ANDIo_1_GT_BIT158
||
11263
159
             
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8137
||
11264
159
             
MI.getOpcode() == PPC::ANDIo_1_GT_BIT8136
) {
11265
153
    unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11266
153
                       
MI.getOpcode() == PPC::ANDIo_1_GT_BIT8152
)
11267
153
                          ? 
PPC::ANDIo8131
11268
153
                          : 
PPC::ANDIo22
;
11269
153
    bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11270
153
                 
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8152
);
11271
153
11272
153
    MachineRegisterInfo &RegInfo = F->getRegInfo();
11273
153
    unsigned Dest = RegInfo.createVirtualRegister(
11274
153
        Opcode == PPC::ANDIo ? 
&PPC::GPRCRegClass22
:
&PPC::G8RCRegClass131
);
11275
153
11276
153
    DebugLoc dl = MI.getDebugLoc();
11277
153
    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
11278
153
        .addReg(MI.getOperand(1).getReg())
11279
153
        .addImm(1);
11280
153
    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
11281
153
            MI.getOperand(0).getReg())
11282
153
        .addReg(isEQ ? 
PPC::CR0EQ2
:
PPC::CR0GT151
);
11283
153
  } else 
if (6
MI.getOpcode() == PPC::TCHECK_RET6
) {
11284
1
    DebugLoc Dl = MI.getDebugLoc();
11285
1
    MachineRegisterInfo &RegInfo = F->getRegInfo();
11286
1
    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11287
1
    BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
11288
1
    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11289
1
            MI.getOperand(0).getReg())
11290
1
        .addReg(CRReg);
11291
5
  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
11292
1
    DebugLoc Dl = MI.getDebugLoc();
11293
1
    unsigned Imm = MI.getOperand(1).getImm();
11294
1
    BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
11295
1
    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11296
1
            MI.getOperand(0).getReg())
11297
1
        .addReg(PPC::CR0EQ);
11298
4
  } else if (MI.getOpcode() == PPC::SETRNDi) {
11299
2
    DebugLoc dl = MI.getDebugLoc();
11300
2
    unsigned OldFPSCRReg = MI.getOperand(0).getReg();
11301
2
11302
2
    // Save FPSCR value.
11303
2
    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11304
2
11305
2
    // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11306
2
    // the following settings:
11307
2
    //   00 Round to nearest
11308
2
    //   01 Round to 0
11309
2
    //   10 Round to +inf
11310
2
    //   11 Round to -inf
11311
2
11312
2
    // When the operand is immediate, using the two least significant bits of
11313
2
    // the immediate to set the bits 62:63 of FPSCR.
11314
2
    unsigned Mode = MI.getOperand(1).getImm();
11315
2
    BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? 
PPC::MTFSB10
: PPC::MTFSB0))
11316
2
      .addImm(31);
11317
2
11318
2
    BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : 
PPC::MTFSB00
))
11319
2
      .addImm(30);
11320
2
  } else if (MI.getOpcode() == PPC::SETRND) {
11321
2
    DebugLoc dl = MI.getDebugLoc();
11322
2
11323
2
    // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11324
2
    // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11325
2
    // If the target doesn't have DirectMove, we should use stack to do the
11326
2
    // conversion, because the target doesn't have the instructions like mtvsrd
11327
2
    // or mfvsrd to do this conversion directly.
11328
4
    auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
11329
4
      if (Subtarget.hasDirectMove()) {
11330
2
        BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
11331
2
          .addReg(SrcReg);
11332
2
      } else {
11333
2
        // Use stack to do the register copy.
11334
2
        unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
11335
2
        MachineRegisterInfo &RegInfo = F->getRegInfo();
11336
2
        const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
11337
2
        if (RC == &PPC::F8RCRegClass) {
11338
1
          // Copy register from F8RCRegClass to G8RCRegclass.
11339
1
          assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
11340
1
                 "Unsupported RegClass.");
11341
1
11342
1
          StoreOp = PPC::STFD;
11343
1
          LoadOp = PPC::LD;
11344
1
        } else {
11345
1
          // Copy register from G8RCRegClass to F8RCRegclass.
11346
1
          assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
11347
1
                 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
11348
1
                 "Unsupported RegClass.");
11349
1
        }
11350
2
11351
2
        MachineFrameInfo &MFI = F->getFrameInfo();
11352
2
        int FrameIdx = MFI.CreateStackObject(8, 8, false);
11353
2
11354
2
        MachineMemOperand *MMOStore = F->getMachineMemOperand(
11355
2
          MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11356
2
          MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
11357
2
          MFI.getObjectAlignment(FrameIdx));
11358
2
11359
2
        // Store the SrcReg into the stack.
11360
2
        BuildMI(*BB, MI, dl, TII->get(StoreOp))
11361
2
          .addReg(SrcReg)
11362
2
          .addImm(0)
11363
2
          .addFrameIndex(FrameIdx)
11364
2
          .addMemOperand(MMOStore);
11365
2
11366
2
        MachineMemOperand *MMOLoad = F->getMachineMemOperand(
11367
2
          MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11368
2
          MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
11369
2
          MFI.getObjectAlignment(FrameIdx));
11370
2
11371
2
        // Load from the stack where SrcReg is stored, and save to DestReg,
11372
2
        // so we have done the RegClass conversion from RegClass::SrcReg to
11373
2
        // RegClass::DestReg.
11374
2
        BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
11375
2
          .addImm(0)
11376
2
          .addFrameIndex(FrameIdx)
11377
2
          .addMemOperand(MMOLoad);
11378
2
      }
11379
4
    };
11380
2
11381
2
    unsigned OldFPSCRReg = MI.getOperand(0).getReg();
11382
2
11383
2
    // Save FPSCR value.
11384
2
    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11385
2
11386
2
    // When the operand is gprc register, use two least significant bits of the
11387
2
    // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11388
2
    //
11389
2
    // copy OldFPSCRTmpReg, OldFPSCRReg
11390
2
    // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11391
2
    // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11392
2
    // copy NewFPSCRReg, NewFPSCRTmpReg
11393
2
    // mtfsf 255, NewFPSCRReg
11394
2
    MachineOperand SrcOp = MI.getOperand(1);
11395
2
    MachineRegisterInfo &RegInfo = F->getRegInfo();
11396
2
    unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11397
2
11398
2
    copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
11399
2
11400
2
    unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11401
2
    unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11402
2
11403
2
    // The first operand of INSERT_SUBREG should be a register which has
11404
2
    // subregisters, we only care about its RegClass, so we should use an
11405
2
    // IMPLICIT_DEF register.
11406
2
    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
11407
2
    BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
11408
2
      .addReg(ImDefReg)
11409
2
      .add(SrcOp)
11410
2
      .addImm(1);
11411
2
11412
2
    unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11413
2
    BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
11414
2
      .addReg(OldFPSCRTmpReg)
11415
2
      .addReg(ExtSrcReg)
11416
2
      .addImm(0)
11417
2
      .addImm(62);
11418
2
11419
2
    unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11420
2
    copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
11421
2
11422
2
    // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11423
2
    // bits of FPSCR.
11424
2
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
11425
2
      .addImm(255)
11426
2
      .addReg(NewFPSCRReg)
11427
2
      .addImm(0)
11428
2
      .addImm(0);
11429
2
  } else {
11430
0
    llvm_unreachable("Unexpected instr type to insert");
11431
0
  }
11432
1.42k
11433
1.42k
  MI.eraseFromParent(); // The pseudo instruction is gone now.
11434
1.42k
  return BB;
11435
1.42k
}
11436
11437
//===----------------------------------------------------------------------===//
11438
// Target Optimization Hooks
11439
//===----------------------------------------------------------------------===//
11440
11441
41
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
11442
41
  // For the estimates, convergence is quadratic, so we essentially double the
11443
41
  // number of digits correct after every iteration. For both FRE and FRSQRTE,
11444
41
  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11445
41
  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11446
41
  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 
30
;
11447
41
  if (VT.getScalarType() == MVT::f64)
11448
19
    RefinementSteps++;
11449
41
  return RefinementSteps;
11450
41
}
11451
11452
SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
11453
                                           int Enabled, int &RefinementSteps,
11454
                                           bool &UseOneConstNR,
11455
23
                                           bool Reciprocal) const {
11456
23
  EVT VT = Operand.getValueType();
11457
23
  if ((VT == MVT::f32 && 
Subtarget.hasFRSQRTES()7
) ||
11458
23
      
(16
VT == MVT::f6416
&&
Subtarget.hasFRSQRTE()8
) ||
11459
23
      
(8
VT == MVT::v4f328
&&
Subtarget.hasAltivec()5
) ||
11460
23
      
(6
VT == MVT::v2f646
&&
Subtarget.hasVSX()0
) ||
11461
23
      
(6
VT == MVT::v4f326
&&
Subtarget.hasQPX()3
) ||
11462
23
      
(3
VT == MVT::v4f643
&&
Subtarget.hasQPX()3
)) {
11463
23
    if (RefinementSteps == ReciprocalEstimate::Unspecified)
11464
21
      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11465
23
11466
23
    // The Newton-Raphson computation with a single constant does not provide
11467
23
    // enough accuracy on some CPUs.
11468
23
    UseOneConstNR = !Subtarget.needsTwoConstNR();
11469
23
    return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
11470
23
  }
11471
0
  return SDValue();
11472
0
}
11473
11474
SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
11475
                                            int Enabled,
11476
25
                                            int &RefinementSteps) const {
11477
25
  EVT VT = Operand.getValueType();
11478
25
  if ((VT == MVT::f32 && 
Subtarget.hasFRES()8
) ||
11479
25
      
(17
VT == MVT::f6417
&&
Subtarget.hasFRE()10
) ||
11480
25
      
(10
VT == MVT::v4f3210
&&
Subtarget.hasAltivec()3
) ||
11481
25
      
(8
VT == MVT::v2f648
&&
Subtarget.hasVSX()1
) ||
11482
25
      
(7
VT == MVT::v4f327
&&
Subtarget.hasQPX()1
) ||
11483
25
      
(6
VT == MVT::v4f646
&&
Subtarget.hasQPX()3
)) {
11484
20
    if (RefinementSteps == ReciprocalEstimate::Unspecified)
11485
20
      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11486
20
    return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
11487
20
  }
11488
5
  return SDValue();
11489
5
}
11490
11491
43
unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11492
43
  // Note: This functionality is used only when unsafe-fp-math is enabled, and
11493
43
  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11494
43
  // enabled for division), this functionality is redundant with the default
11495
43
  // combiner logic (once the division -> reciprocal/multiply transformation
11496
43
  // has taken place). As a result, this matters more for older cores than for
11497
43
  // newer ones.
11498
43
11499
43
  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11500
43
  // reciprocal if there are two or more FDIVs (for embedded cores with only
11501
43
  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11502
43
  switch (Subtarget.getDarwinDirective()) {
11503
43
  default:
11504
33
    return 3;
11505
43
  case PPC::DIR_440:
11506
10
  case PPC::DIR_A2:
11507
10
  case PPC::DIR_E500:
11508
10
  case PPC::DIR_E500mc:
11509
10
  case PPC::DIR_E5500:
11510
10
    return 2;
11511
43
  }
11512
43
}
11513
11514
// isConsecutiveLSLoc needs to work even if all adds have not yet been
11515
// collapsed, and so we need to look through chains of them.
11516
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
11517
12.1k
                                     int64_t& Offset, SelectionDAG &DAG) {
11518
12.1k
  if (DAG.isBaseWithConstantOffset(Loc)) {
11519
6.28k
    Base = Loc.getOperand(0);
11520
6.28k
    Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
11521
6.28k
11522
6.28k
    // The base might itself be a base plus an offset, and if so, accumulate
11523
6.28k
    // that as well.
11524
6.28k
    getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
11525
6.28k
  }
11526
12.1k
}
11527
11528
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
11529
                            unsigned Bytes, int Dist,
11530
3.11k
                            SelectionDAG &DAG) {
11531
3.11k
  if (VT.getSizeInBits() / 8 != Bytes)
11532
172
    return false;
11533
2.93k
11534
2.93k
  SDValue BaseLoc = Base->getBasePtr();
11535
2.93k
  if (Loc.getOpcode() == ISD::FrameIndex) {
11536
1
    if (BaseLoc.getOpcode() != ISD::FrameIndex)
11537
1
      return false;
11538
0
    const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11539
0
    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
11540
0
    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
11541
0
    int FS  = MFI.getObjectSize(FI);
11542
0
    int BFS = MFI.getObjectSize(BFI);
11543
0
    if (FS != BFS || FS != (int)Bytes) return false;
11544
0
    return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
11545
0
  }
11546
2.93k
11547
2.93k
  SDValue Base1 = Loc, Base2 = BaseLoc;
11548
2.93k
  int64_t Offset1 = 0, Offset2 = 0;
11549
2.93k
  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
11550
2.93k
  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
11551
2.93k
  if (Base1 == Base2 && 
Offset1 == (Offset2 + Dist * Bytes)2.18k
)
11552
228
    return true;
11553
2.71k
11554
2.71k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11555
2.71k
  const GlobalValue *GV1 = nullptr;
11556
2.71k
  const GlobalValue *GV2 = nullptr;
11557
2.71k
  Offset1 = 0;
11558
2.71k
  Offset2 = 0;
11559
2.71k
  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
11560
2.71k
  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
11561
2.71k
  if (isGA1 && 
isGA215
&&
GV1 == GV212
)
11562
12
    return Offset1 == (Offset2 + Dist*Bytes);
11563
2.69k
  return false;
11564
2.69k
}
11565
11566
// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11567
// not enforce equality of the chain operands.
11568
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
11569
                            unsigned Bytes, int Dist,
11570
3.11k
                            SelectionDAG &DAG) {
11571
3.11k
  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
11572
2.16k
    EVT VT = LS->getMemoryVT();
11573
2.16k
    SDValue Loc = LS->getBasePtr();
11574
2.16k
    return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
11575
2.16k
  }
11576
952
11577
952
  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
11578
949
    EVT VT;
11579
949
    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11580
949
    
default: return false0
;
11581
949
    case Intrinsic::ppc_qpx_qvlfd:
11582
754
    case Intrinsic::ppc_qpx_qvlfda:
11583
754
      VT = MVT::v4f64;
11584
754
      break;
11585
754
    case Intrinsic::ppc_qpx_qvlfs:
11586
4
    case Intrinsic::ppc_qpx_qvlfsa:
11587
4
      VT = MVT::v4f32;
11588
4
      break;
11589
4
    case Intrinsic::ppc_qpx_qvlfcd:
11590
0
    case Intrinsic::ppc_qpx_qvlfcda:
11591
0
      VT = MVT::v2f64;
11592
0
      break;
11593
0
    case Intrinsic::ppc_qpx_qvlfcs:
11594
0
    case Intrinsic::ppc_qpx_qvlfcsa:
11595
0
      VT = MVT::v2f32;
11596
0
      break;
11597
191
    case Intrinsic::ppc_qpx_qvlfiwa:
11598
191
    case Intrinsic::ppc_qpx_qvlfiwz:
11599
191
    case Intrinsic::ppc_altivec_lvx:
11600
191
    case Intrinsic::ppc_altivec_lvxl:
11601
191
    case Intrinsic::ppc_vsx_lxvw4x:
11602
191
    case Intrinsic::ppc_vsx_lxvw4x_be:
11603
191
      VT = MVT::v4i32;
11604
191
      break;
11605
191
    case Intrinsic::ppc_vsx_lxvd2x:
11606
0
    case Intrinsic::ppc_vsx_lxvd2x_be:
11607
0
      VT = MVT::v2f64;
11608
0
      break;
11609
0
    case Intrinsic::ppc_altivec_lvebx:
11610
0
      VT = MVT::i8;
11611
0
      break;
11612
0
    case Intrinsic::ppc_altivec_lvehx:
11613
0
      VT = MVT::i16;
11614
0
      break;
11615
0
    case Intrinsic::ppc_altivec_lvewx:
11616
0
      VT = MVT::i32;
11617
0
      break;
11618
949
    }
11619
949
11620
949
    return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
11621
949
  }
11622
3
11623
3
  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
11624
1
    EVT VT;
11625
1
    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11626
1
    
default: return false0
;
11627
1
    case Intrinsic::ppc_qpx_qvstfd:
11628
0
    case Intrinsic::ppc_qpx_qvstfda:
11629
0
      VT = MVT::v4f64;
11630
0
      break;
11631
0
    case Intrinsic::ppc_qpx_qvstfs:
11632
0
    case Intrinsic::ppc_qpx_qvstfsa:
11633
0
      VT = MVT::v4f32;
11634
0
      break;
11635
0
    case Intrinsic::ppc_qpx_qvstfcd:
11636
0
    case Intrinsic::ppc_qpx_qvstfcda:
11637
0
      VT = MVT::v2f64;
11638
0
      break;
11639
0
    case Intrinsic::ppc_qpx_qvstfcs:
11640
0
    case Intrinsic::ppc_qpx_qvstfcsa:
11641
0
      VT = MVT::v2f32;
11642
0
      break;
11643
1
    case Intrinsic::ppc_qpx_qvstfiw:
11644
1
    case Intrinsic::ppc_qpx_qvstfiwa:
11645
1
    case Intrinsic::ppc_altivec_stvx:
11646
1
    case Intrinsic::ppc_altivec_stvxl:
11647
1
    case Intrinsic::ppc_vsx_stxvw4x:
11648
1
      VT = MVT::v4i32;
11649
1
      break;
11650
1
    case Intrinsic::ppc_vsx_stxvd2x:
11651
0
      VT = MVT::v2f64;
11652
0
      break;
11653
1
    case Intrinsic::ppc_vsx_stxvw4x_be:
11654
0
      VT = MVT::v4i32;
11655
0
      break;
11656
1
    case Intrinsic::ppc_vsx_stxvd2x_be:
11657
0
      VT = MVT::v2f64;
11658
0
      break;
11659
1
    case Intrinsic::ppc_altivec_stvebx:
11660
0
      VT = MVT::i8;
11661
0
      break;
11662
1
    case Intrinsic::ppc_altivec_stvehx:
11663
0
      VT = MVT::i16;
11664
0
      break;
11665
1
    case Intrinsic::ppc_altivec_stvewx:
11666
0
      VT = MVT::i32;
11667
0
      break;
11668
1
    }
11669
1
11670
1
    return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
11671
1
  }
11672
2
11673
2
  return false;
11674
2
}
11675
11676
// Return true is there is a nearyby consecutive load to the one provided
11677
// (regardless of alignment). We search up and down the chain, looking though
11678
// token factors and other loads (but nothing else). As a result, a true result
11679
// indicates that it is safe to create a new consecutive load adjacent to the
11680
// load provided.
11681
107
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
11682
107
  SDValue Chain = LD->getChain();
11683
107
  EVT VT = LD->getMemoryVT();
11684
107
11685
107
  SmallSet<SDNode *, 16> LoadRoots;
11686
107
  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
11687
107
  SmallSet<SDNode *, 16> Visited;
11688
107
11689
107
  // First, search up the chain, branching to follow all token-factor operands.
11690
107
  // If we find a consecutive load, then we're done, otherwise, record all
11691
107
  // nodes just above the top-level loads and token factors.
11692
694
  while (!Queue.empty()) {
11693
588
    SDNode *ChainNext = Queue.pop_back_val();
11694
588
    if (!Visited.insert(ChainNext).second)
11695
0
      continue;
11696
588
11697
588
    if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
11698
350
      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11699
1
        return true;
11700
349
11701
349
      if (!Visited.count(ChainLD->getChain().getNode()))
11702
13
        Queue.push_back(ChainLD->getChain().getNode());
11703
349
    } else 
if (238
ChainNext->getOpcode() == ISD::TokenFactor238
) {
11704
132
      for (const SDUse &O : ChainNext->ops())
11705
468
        if (!Visited.count(O.getNode()))
11706
468
          Queue.push_back(O.getNode());
11707
132
    } else
11708
106
      LoadRoots.insert(ChainNext);
11709
588
  }
11710
107
11711
107
  // Second, search down the chain, starting from the top-level nodes recorded
11712
107
  // in the first phase. These top-level nodes are the nodes just above all
11713
107
  // loads and token factors. Starting with their uses, recursively look though
11714
107
  // all loads (just the chain uses) and token factors to find a consecutive
11715
107
  // load.
11716
107
  Visited.clear();
11717
106
  Queue.clear();
11718
106
11719
106
  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
11720
162
       IE = LoadRoots.end(); I != IE; 
++I56
) {
11721
106
    Queue.push_back(*I);
11722
106
11723
2.49k
    while (!Queue.empty()) {
11724
2.43k
      SDNode *LoadRoot = Queue.pop_back_val();
11725
2.43k
      if (!Visited.insert(LoadRoot).second)
11726
8
        continue;
11727
2.43k
11728
2.43k
      if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
11729
1.84k
        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11730
50
          return true;
11731
2.38k
11732
2.38k
      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
11733
9.01k
           UE = LoadRoot->use_end(); UI != UE; 
++UI6.63k
)
11734
6.63k
        if (((isa<MemSDNode>(*UI) &&
11735
6.63k
            
cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot2.27k
) ||
11736
6.63k
            
UI->getOpcode() == ISD::TokenFactor4.36k
) &&
!Visited.count(*UI)4.10k
)
11737
2.74k
          Queue.push_back(*UI);
11738
2.38k
    }
11739
106
  }
11740
106
11741
106
  
return false56
;
11742
106
}
11743
11744
/// This function is called when we have proved that a SETCC node can be replaced
11745
/// by subtraction (and other supporting instructions) so that the result of
11746
/// comparison is kept in a GPR instead of CR. This function is purely for
11747
/// codegen purposes and has some flags to guide the codegen process.
11748
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
11749
102
                                     bool Swap, SDLoc &DL, SelectionDAG &DAG) {
11750
102
  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11751
102
11752
102
  // Zero extend the operands to the largest legal integer. Originally, they
11753
102
  // must be of a strictly smaller size.
11754
102
  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
11755
102
                         DAG.getConstant(Size, DL, MVT::i32));
11756
102
  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
11757
102
                         DAG.getConstant(Size, DL, MVT::i32));
11758
102
11759
102
  // Swap if needed. Depends on the condition code.
11760
102
  if (Swap)
11761
51
    std::swap(Op0, Op1);
11762
102
11763
102
  // Subtract extended integers.
11764
102
  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
11765
102
11766
102
  // Move the sign bit to the least significant position and zero out the rest.
11767
102
  // Now the least significant bit carries the result of original comparison.
11768
102
  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
11769
102
                             DAG.getConstant(Size - 1, DL, MVT::i32));
11770
102
  auto Final = Shifted;
11771
102
11772
102
  // Complement the result if needed. Based on the condition code.
11773
102
  if (Complement)
11774
50
    Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
11775
50
                        DAG.getConstant(1, DL, MVT::i64));
11776
102
11777
102
  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
11778
102
}
11779
11780
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
11781
1.02k
                                                  DAGCombinerInfo &DCI) const {
11782
1.02k
  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11783
1.02k
11784
1.02k
  SelectionDAG &DAG = DCI.DAG;
11785
1.02k
  SDLoc DL(N);
11786
1.02k
11787
1.02k
  // Size of integers being compared has a critical role in the following
11788
1.02k
  // analysis, so we prefer to do this when all types are legal.
11789
1.02k
  if (!DCI.isAfterLegalizeDAG())
11790
592
    return SDValue();
11791
430
11792
430
  // If all users of SETCC extend its value to a legal integer type
11793
430
  // then we replace SETCC with a subtraction
11794
430
  for (SDNode::use_iterator UI = N->use_begin(),
11795
584
       UE = N->use_end(); UI != UE; 
++UI154
) {
11796
430
    if (UI->getOpcode() != ISD::ZERO_EXTEND)
11797
276
      return SDValue();
11798
430
  }
11799
430
11800
430
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
11801
154
  auto OpSize = N->getOperand(0).getValueSizeInBits();
11802
154
11803
154
  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
11804
154
11805
154
  if (OpSize < Size) {
11806
102
    switch (CC) {
11807
102
    
default: break0
;
11808
102
    case ISD::SETULT:
11809
26
      return generateEquivalentSub(N, Size, false, false, DL, DAG);
11810
102
    case ISD::SETULE:
11811
25
      return generateEquivalentSub(N, Size, true, true, DL, DAG);
11812
102
    case ISD::SETUGT:
11813
26
      return generateEquivalentSub(N, Size, false, true, DL, DAG);
11814
102
    case ISD::SETUGE:
11815
25
      return generateEquivalentSub(N, Size, true, false, DL, DAG);
11816
52
    }
11817
52
  }
11818
52
11819
52
  return SDValue();
11820
52
}
11821
11822
SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
11823
24.7k
                                                  DAGCombinerInfo &DCI) const {
11824
24.7k
  SelectionDAG &DAG = DCI.DAG;
11825
24.7k
  SDLoc dl(N);
11826
24.7k
11827
24.7k
  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
11828
24.7k
  // If we're tracking CR bits, we need to be careful that we don't have:
11829
24.7k
  //   trunc(binary-ops(zext(x), zext(y)))
11830
24.7k
  // or
11831
24.7k
  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11832
24.7k
  // such that we're unnecessarily moving things into GPRs when it would be
11833
24.7k
  // better to keep them in CR bits.
11834
24.7k
11835
24.7k
  // Note that trunc here can be an actual i1 trunc, or can be the effective
11836
24.7k
  // truncation that comes from a setcc or select_cc.
11837
24.7k
  if (N->getOpcode() == ISD::TRUNCATE &&
11838
24.7k
      
N->getValueType(0) != MVT::i110.0k
)
11839
9.73k
    return SDValue();
11840
14.9k
11841
14.9k
  if (N->getOperand(0).getValueType() != MVT::i32 &&
11842
14.9k
      
N->getOperand(0).getValueType() != MVT::i6410.1k
)
11843
7.60k
    return SDValue();
11844
7.37k
11845
7.37k
  if (N->getOpcode() == ISD::SETCC ||
11846
7.37k
      
N->getOpcode() == ISD::SELECT_CC1.04k
) {
11847
7.02k
    // If we're looking at a comparison, then we need to make sure that the
11848
7.02k
    // high bits (all except for the first) don't matter the result.
11849
7.02k
    ISD::CondCode CC =
11850
7.02k
      cast<CondCodeSDNode>(N->getOperand(
11851
7.02k
        N->getOpcode() == ISD::SETCC ? 
26.32k
:
4701
))->get();
11852
7.02k
    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
11853
7.02k
11854
7.02k
    if (ISD::isSignedIntSetCC(CC)) {
11855
1.65k
      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
11856
1.65k
          
DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits0
)
11857
1.65k
        return SDValue();
11858
5.36k
    } else if (ISD::isUnsignedIntSetCC(CC)) {
11859
1.21k
      if (!DAG.MaskedValueIsZero(N->getOperand(0),
11860
1.21k
                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
11861
1.21k
          !DAG.MaskedValueIsZero(N->getOperand(1),
11862
40
                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
11863
1.17k
        return (N->getOpcode() == ISD::SETCC ? 
ConvertSETCCToSubtract(N, DCI)1.02k
11864
1.17k
                                             : 
SDValue()153
);
11865
4.15k
    } else {
11866
4.15k
      // This is neither a signed nor an unsigned comparison, just make sure
11867
4.15k
      // that the high bits are equal.
11868
4.15k
      KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
11869
4.15k
      KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
11870
4.15k
11871
4.15k
      // We don't really care about what is known about the first bit (if
11872
4.15k
      // anything), so clear it in all masks prior to comparing them.
11873
4.15k
      Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
11874
4.15k
      Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
11875
4.15k
11876
4.15k
      if (Op1Known.Zero != Op2Known.Zero || 
Op1Known.One != Op2Known.One1.87k
)
11877
2.34k
        return SDValue();
11878
2.19k
    }
11879
7.02k
  }
11880
2.19k
11881
2.19k
  // We now know that the higher-order bits are irrelevant, we just need to
11882
2.19k
  // make sure that all of the intermediate operations are bit operations, and
11883
2.19k
  // all inputs are extensions.
11884
2.19k
  if (N->getOperand(0).getOpcode() != ISD::AND &&
11885
2.19k
      
N->getOperand(0).getOpcode() != ISD::OR2.14k
&&
11886
2.19k
      
N->getOperand(0).getOpcode() != ISD::XOR2.14k
&&
11887
2.19k
      
N->getOperand(0).getOpcode() != ISD::SELECT2.14k
&&
11888
2.19k
      
N->getOperand(0).getOpcode() != ISD::SELECT_CC2.14k
&&
11889
2.19k
      
N->getOperand(0).getOpcode() != ISD::TRUNCATE2.14k
&&
11890
2.19k
      
N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND1.19k
&&
11891
2.19k
      
N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND1.19k
&&
11892
2.19k
      
N->getOperand(0).getOpcode() != ISD::ANY_EXTEND1.19k
)
11893
1.19k
    return SDValue();
11894
1.00k
11895
1.00k
  if ((N->getOpcode() == ISD::SETCC || 
N->getOpcode() == ISD::SELECT_CC12
) &&
11896
1.00k
      
N->getOperand(1).getOpcode() != ISD::AND1.00k
&&
11897
1.00k
      
N->getOperand(1).getOpcode() != ISD::OR954
&&
11898
1.00k
      
N->getOperand(1).getOpcode() != ISD::XOR954
&&
11899
1.00k
      
N->getOperand(1).getOpcode() != ISD::SELECT954
&&
11900
1.00k
      
N->getOperand(1).getOpcode() != ISD::SELECT_CC954
&&
11901
1.00k
      
N->getOperand(1).getOpcode() != ISD::TRUNCATE954
&&
11902
1.00k
      
N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND2
&&
11903
1.00k
      
N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND2
&&
11904
1.00k
      
N->getOperand(1).getOpcode() != ISD::ANY_EXTEND2
)
11905
2
    return SDValue();
11906
1.00k
11907
1.00k
  SmallVector<SDValue, 4> Inputs;
11908
1.00k
  SmallVector<SDValue, 8> BinOps, PromOps;
11909
1.00k
  SmallPtrSet<SDNode *, 16> Visited;
11910
1.00k
11911
3.00k
  for (unsigned i = 0; i < 2; 
++i2.00k
) {
11912
2.00k
    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11913
2.00k
          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11914
2.00k
          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11915
2.00k
          
N->getOperand(i).getOperand(0).getValueType() == MVT::i10
) ||
11916
2.00k
        isa<ConstantSDNode>(N->getOperand(i)))
11917
0
      Inputs.push_back(N->getOperand(i));
11918
2.00k
    else
11919
2.00k
      BinOps.push_back(N->getOperand(i));
11920
2.00k
11921
2.00k
    if (N->getOpcode() == ISD::TRUNCATE)
11922
4
      break;
11923
2.00k
  }
11924
1.00k
11925
1.00k
  // Visit all inputs, collect all binary operations (and, or, xor and
11926
1.00k
  // select) that are all fed by extensions.
11927
1.03k
  while (!BinOps.empty()) {
11928
1.03k
    SDValue BinOp = BinOps.back();
11929
1.03k
    BinOps.pop_back();
11930
1.03k
11931
1.03k
    if (!Visited.insert(BinOp.getNode()).second)
11932
0
      continue;
11933
1.03k
11934
1.03k
    PromOps.push_back(BinOp);
11935
1.03k
11936
1.10k
    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; 
++i64
) {
11937
1.06k
      // The condition of the select is not promoted.
11938
1.06k
      if (BinOp.getOpcode() == ISD::SELECT && 
i == 00
)
11939
0
        continue;
11940
1.06k
      if (BinOp.getOpcode() == ISD::SELECT_CC && 
i != 20
&&
i != 30
)
11941
0
        continue;
11942
1.06k
11943
1.06k
      if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11944
1.06k
            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11945
1.06k
            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11946
1.06k
           
BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i10
) ||
11947
1.06k
          isa<ConstantSDNode>(BinOp.getOperand(i))) {
11948
32
        Inputs.push_back(BinOp.getOperand(i));
11949
1.03k
      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
11950
1.03k
                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
11951
1.03k
                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
11952
1.03k
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
11953
1.03k
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
11954
1.03k
                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
11955
1.03k
                 
BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND1.00k
||
11956
1.03k
                 
BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND1.00k
||
11957
1.03k
                 
BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND1.00k
) {
11958
32
        BinOps.push_back(BinOp.getOperand(i));
11959
1.00k
      } else {
11960
1.00k
        // We have an input that is not an extension or another binary
11961
1.00k
        // operation; we'll abort this transformation.
11962
1.00k
        return SDValue();
11963
1.00k
      }
11964
1.06k
    }
11965
1.03k
  }
11966
1.00k
11967
1.00k
  // Make sure that this is a self-contained cluster of operations (which
11968
1.00k
  // is not quite the same thing as saying that everything has only one
11969
1.00k
  // use).
11970
1.00k
  
for (unsigned i = 0, ie = Inputs.size(); 0
i != ie0
;
++i0
) {
11971
0
    if (isa<ConstantSDNode>(Inputs[i]))
11972
0
      continue;
11973
0
11974
0
    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
11975
0
                              UE = Inputs[i].getNode()->use_end();
11976
0
         UI != UE; ++UI) {
11977
0
      SDNode *User = *UI;
11978
0
      if (User != N && !Visited.count(User))
11979
0
        return SDValue();
11980
0
11981
0
      // Make sure that we're not going to promote the non-output-value
11982
0
      // operand(s) or SELECT or SELECT_CC.
11983
0
      // FIXME: Although we could sometimes handle this, and it does occur in
11984
0
      // practice that one of the condition inputs to the select is also one of
11985
0
      // the outputs, we currently can't deal with this.
11986
0
      if (User->getOpcode() == ISD::SELECT) {
11987
0
        if (User->getOperand(0) == Inputs[i])
11988
0
          return SDValue();
11989
0
      } else if (User->getOpcode() == ISD::SELECT_CC) {
11990
0
        if (User->getOperand(0) == Inputs[i] ||
11991
0
            User->getOperand(1) == Inputs[i])
11992
0
          return SDValue();
11993
0
      }
11994
0
    }
11995
0
  }
11996
0
11997
0
  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
11998
0
    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
11999
0
                              UE = PromOps[i].getNode()->use_end();
12000
0
         UI != UE; ++UI) {
12001
0
      SDNode *User = *UI;
12002
0
      if (User != N && !Visited.count(User))
12003
0
        return SDValue();
12004
0
12005
0
      // Make sure that we're not going to promote the non-output-value
12006
0
      // operand(s) or SELECT or SELECT_CC.
12007
0
      // FIXME: Although we could sometimes handle this, and it does occur in
12008
0
      // practice that one of the condition inputs to the select is also one of
12009
0
      // the outputs, we currently can't deal with this.
12010
0
      if (User->getOpcode() == ISD::SELECT) {
12011
0
        if (User->getOperand(0) == PromOps[i])
12012
0
          return SDValue();
12013
0
      } else if (User->getOpcode() == ISD::SELECT_CC) {
12014
0
        if (User->getOperand(0) == PromOps[i] ||
12015
0
            User->getOperand(1) == PromOps[i])
12016
0
          return SDValue();
12017
0
      }
12018
0
    }
12019
0
  }
12020
0
12021
0
  // Replace all inputs with the extension operand.
12022
0
  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12023
0
    // Constants may have users outside the cluster of to-be-promoted nodes,
12024
0
    // and so we need to replace those as we do the promotions.
12025
0
    if (isa<ConstantSDNode>(Inputs[i]))
12026
0
      continue;
12027
0
    else
12028
0
      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12029
0
  }
12030
0
12031
0
  std::list<HandleSDNode> PromOpHandles;
12032
0
  for (auto &PromOp : PromOps)
12033
0
    PromOpHandles.emplace_back(PromOp);
12034
0
12035
0
  // Replace all operations (these are all the same, but have a different
12036
0
  // (i1) return type). DAG.getNode will validate that the types of
12037
0
  // a binary operator match, so go through the list in reverse so that
12038
0
  // we've likely promoted both operands first. Any intermediate truncations or
12039
0
  // extensions disappear.
12040
0
  while (!PromOpHandles.empty()) {
12041
0
    SDValue PromOp = PromOpHandles.back().getValue();
12042
0
    PromOpHandles.pop_back();
12043
0
12044
0
    if (PromOp.getOpcode() == ISD::TRUNCATE ||
12045
0
        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12046
0
        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12047
0
        PromOp.getOpcode() == ISD::ANY_EXTEND) {
12048
0
      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12049
0
          PromOp.getOperand(0).getValueType() != MVT::i1) {
12050
0
        // The operand is not yet ready (see comment below).
12051
0
        PromOpHandles.emplace_front(PromOp);
12052
0
        continue;
12053
0
      }
12054
0
12055
0
      SDValue RepValue = PromOp.getOperand(0);
12056
0
      if (isa<ConstantSDNode>(RepValue))
12057
0
        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12058
0
12059
0
      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12060
0
      continue;
12061
0
    }
12062
0
12063
0
    unsigned C;
12064
0
    switch (PromOp.getOpcode()) {
12065
0
    default:             C = 0; break;
12066
0
    case ISD::SELECT:    C = 1; break;
12067
0
    case ISD::SELECT_CC: C = 2; break;
12068
0
    }
12069
0
12070
0
    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12071
0
         PromOp.getOperand(C).getValueType() != MVT::i1) ||
12072
0
        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12073
0
         PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12074
0
      // The to-be-promoted operands of this node have not yet been
12075
0
      // promoted (this should be rare because we're going through the
12076
0
      // list backward, but if one of the operands has several users in
12077
0
      // this cluster of to-be-promoted nodes, it is possible).
12078
0
      PromOpHandles.emplace_front(PromOp);
12079
0
      continue;
12080
0
    }
12081
0
12082
0
    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12083
0
                                PromOp.getNode()->op_end());
12084
0
12085
0
    // If there are any constant inputs, make sure they're replaced now.
12086
0
    for (unsigned i = 0; i < 2; ++i)
12087
0
      if (isa<ConstantSDNode>(Ops[C+i]))
12088
0
        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12089
0
12090
0
    DAG.ReplaceAllUsesOfValueWith(PromOp,
12091
0
      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12092
0
  }
12093
0
12094
0
  // Now we're left with the initial truncation itself.
12095
0
  if (N->getOpcode() == ISD::TRUNCATE)
12096
0
    return N->getOperand(0);
12097
0
12098
0
  // Otherwise, this is a comparison. The operands to be compared have just
12099
0
  // changed type (to i1), but everything else is the same.
12100
0
  return SDValue(N, 0);
12101
0
}
12102
12103
SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12104
7.37k
                                                  DAGCombinerInfo &DCI) const {
12105
7.37k
  SelectionDAG &DAG = DCI.DAG;
12106
7.37k
  SDLoc dl(N);
12107
7.37k
12108
7.37k
  // If we're tracking CR bits, we need to be careful that we don't have:
12109
7.37k
  //   zext(binary-ops(trunc(x), trunc(y)))
12110
7.37k
  // or
12111
7.37k
  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12112
7.37k
  // such that we're unnecessarily moving things into CR bits that can more
12113
7.37k
  // efficiently stay in GPRs. Note that if we're not certain that the high
12114
7.37k
  // bits are set as required by the final extension, we still may need to do
12115
7.37k
  // some masking to get the proper behavior.
12116
7.37k
12117
7.37k
  // This same functionality is important on PPC64 when dealing with
12118
7.37k
  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12119
7.37k
  // the return values of functions. Because it is so similar, it is handled
12120
7.37k
  // here as well.
12121
7.37k
12122
7.37k
  if (N->getValueType(0) != MVT::i32 &&
12123
7.37k
      
N->getValueType(0) != MVT::i646.15k
)
12124
429
    return SDValue();
12125
6.94k
12126
6.94k
  if (!((N->getOperand(0).getValueType() == MVT::i1 && 
Subtarget.useCRBits()3.26k
) ||
12127
6.94k
        
(3.70k
N->getOperand(0).getValueType() == MVT::i323.70k
&&
Subtarget.isPPC64()2.82k
)))
12128
916
    return SDValue();
12129
6.02k
12130
6.02k
  if (N->getOperand(0).getOpcode() != ISD::AND &&
12131
6.02k
      
N->getOperand(0).getOpcode() != ISD::OR5.94k
&&
12132
6.02k
      
N->getOperand(0).getOpcode() != ISD::XOR5.86k
&&
12133
6.02k
      
N->getOperand(0).getOpcode() != ISD::SELECT5.83k
&&
12134
6.02k
      
N->getOperand(0).getOpcode() != ISD::SELECT_CC5.75k
)
12135
5.66k
    return SDValue();
12136
358
12137
358
  SmallVector<SDValue, 4> Inputs;
12138
358
  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12139
358
  SmallPtrSet<SDNode *, 16> Visited;
12140
358
12141
358
  // Visit all inputs, collect all binary operations (and, or, xor and
12142
358
  // select) that are all fed by truncations.
12143
493
  while (!BinOps.empty()) {
12144
413
    SDValue BinOp = BinOps.back();
12145
413
    BinOps.pop_back();
12146
413
12147
413
    if (!Visited.insert(BinOp.getNode()).second)
12148
0
      continue;
12149
413
12150
413
    PromOps.push_back(BinOp);
12151
413
12152
1.03k
    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; 
++i626
) {
12153
904
      // The condition of the select is not promoted.
12154
904
      if (BinOp.getOpcode() == ISD::SELECT && 
i == 0269
)
12155
95
        continue;
12156
809
      if (BinOp.getOpcode() == ISD::SELECT_CC && 
i != 2334
&&
i != 3246
)
12157
191
        continue;
12158
618
12159
618
      if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12160
618
          
isa<ConstantSDNode>(BinOp.getOperand(i))490
) {
12161
265
        Inputs.push_back(BinOp.getOperand(i));
12162
353
      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12163
353
                 
BinOp.getOperand(i).getOpcode() == ISD::OR325
||
12164
353
                 
BinOp.getOperand(i).getOpcode() == ISD::XOR301
||
12165
353
                 
BinOp.getOperand(i).getOpcode() == ISD::SELECT301
||
12166
353
                 
BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC282
) {
12167
75
        BinOps.push_back(BinOp.getOperand(i));
12168
278
      } else {
12169
278
        // We have an input that is not a truncation or another binary
12170
278
        // operation; we'll abort this transformation.
12171
278
        return SDValue();
12172
278
      }
12173
618
    }
12174
413
  }
12175
358
12176
358
  // The operands of a select that must be truncated when the select is
12177
358
  // promoted because the operand is actually part of the to-be-promoted set.
12178
358
  DenseMap<SDNode *, EVT> SelectTruncOp[2];
12179
80
12180
80
  // Make sure that this is a self-contained cluster of operations (which
12181
80
  // is not quite the same thing as saying that everything has only one
12182
80
  // use).
12183
250
  for (unsigned i = 0, ie = Inputs.size(); i != ie; 
++i170
) {
12184
181
    if (isa<ConstantSDNode>(Inputs[i]))
12185
74
      continue;
12186
107
12187
107
    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12188
107
                              UE = Inputs[i].getNode()->use_end();
12189
224
         UI != UE; 
++UI117
) {
12190
128
      SDNode *User = *UI;
12191
128
      if (User != N && !Visited.count(User))
12192
11
        return SDValue();
12193
117
12194
117
      // If we're going to promote the non-output-value operand(s) or SELECT or
12195
117
      // SELECT_CC, record them for truncation.
12196
117
      if (User->getOpcode() == ISD::SELECT) {
12197
64
        if (User->getOperand(0) == Inputs[i])
12198
0
          SelectTruncOp[0].insert(std::make_pair(User,
12199
0
                                    User->getOperand(0).getValueType()));
12200
64
      } else 
if (53
User->getOpcode() == ISD::SELECT_CC53
) {
12201
22
        if (User->getOperand(0) == Inputs[i])
12202
18
          SelectTruncOp[0].insert(std::make_pair(User,
12203
18
                                    User->getOperand(0).getValueType()));
12204
22
        if (User->getOperand(1) == Inputs[i])
12205
2
          SelectTruncOp[1].insert(std::make_pair(User,
12206
2
                                    User->getOperand(1).getValueType()));
12207
22
      }
12208
117
    }
12209
107
  }
12210
80
12211
167
  
for (unsigned i = 0, ie = PromOps.size(); 69
i != ie;
++i98
) {
12212
98
    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12213
98
                              UE = PromOps[i].getNode()->use_end();
12214
196
         UI != UE; 
++UI98
) {
12215
98
      SDNode *User = *UI;
12216
98
      if (User != N && 
!Visited.count(User)29
)
12217
0
        return SDValue();
12218
98
12219
98
      // If we're going to promote the non-output-value operand(s) or SELECT or
12220
98
      // SELECT_CC, record them for truncation.
12221
98
      if (User->getOpcode() == ISD::SELECT) {
12222
0
        if (User->getOperand(0) == PromOps[i])
12223
0
          SelectTruncOp[0].insert(std::make_pair(User,
12224
0
                                    User->getOperand(0).getValueType()));
12225
98
      } else if (User->getOpcode() == ISD::SELECT_CC) {
12226
0
        if (User->getOperand(0) == PromOps[i])
12227
0
          SelectTruncOp[0].insert(std::make_pair(User,
12228
0
                                    User->getOperand(0).getValueType()));
12229
0
        if (User->getOperand(1) == PromOps[i])
12230
0
          SelectTruncOp[1].insert(std::make_pair(User,
12231
0
                                    User->getOperand(1).getValueType()));
12232
0
      }
12233
98
    }
12234
98
  }
12235
69
12236
69
  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12237
69
  bool ReallyNeedsExt = false;
12238
69
  if (N->getOpcode() != ISD::ANY_EXTEND) {
12239
55
    // If all of the inputs are not already sign/zero extended, then
12240
55
    // we'll still need to do that at the end.
12241
171
    for (unsigned i = 0, ie = Inputs.size(); i != ie; 
++i116
) {
12242
127
      if (isa<ConstantSDNode>(Inputs[i]))
12243
52
        continue;
12244
75
12245
75
      unsigned OpBits =
12246
75
        Inputs[i].getOperand(0).getValueSizeInBits();
12247
75
      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12248
75
12249
75
      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12250
75
           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12251
21
                                  APInt::getHighBitsSet(OpBits,
12252
21
                                                        OpBits-PromBits))) ||
12253
75
          
(68
N->getOpcode() == ISD::SIGN_EXTEND68
&&
12254
68
           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12255
54
             (OpBits-(PromBits-1)))) {
12256
11
        ReallyNeedsExt = true;
12257
11
        break;
12258
11
      }
12259
75
    }
12260
55
  }
12261
69
12262
69
  // Replace all inputs, either with the truncation operand, or a
12263
69
  // truncation or extension to the final output type.
12264
236
  for (unsigned i = 0, ie = Inputs.size(); i != ie; 
++i167
) {
12265
167
    // Constant inputs need to be replaced with the to-be-promoted nodes that
12266
167
    // use them because they might have users outside of the cluster of
12267
167
    // promoted nodes.
12268
167
    if (isa<ConstantSDNode>(Inputs[i]))
12269
72
      continue;
12270
95
12271
95
    SDValue InSrc = Inputs[i].getOperand(0);
12272
95
    if (Inputs[i].getValueType() == N->getValueType(0))
12273
0
      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
12274
95
    else if (N->getOpcode() == ISD::SIGN_EXTEND)
12275
54
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12276
54
        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
12277
41
    else if (N->getOpcode() == ISD::ZERO_EXTEND)
12278
22
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12279
22
        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
12280
19
    else
12281
19
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12282
19
        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
12283
95
  }
12284
69
12285
69
  std::list<HandleSDNode> PromOpHandles;
12286
69
  for (auto &PromOp : PromOps)
12287
98
    PromOpHandles.emplace_back(PromOp);
12288
69
12289
69
  // Replace all operations (these are all the same, but have a different
12290
69
  // (promoted) return type). DAG.getNode will validate that the types of
12291
69
  // a binary operator match, so go through the list in reverse so that
12292
69
  // we've likely promoted both operands first.
12293
167
  while (!PromOpHandles.empty()) {
12294
98
    SDValue PromOp = PromOpHandles.back().getValue();
12295
98
    PromOpHandles.pop_back();
12296
98
12297
98
    unsigned C;
12298
98
    switch (PromOp.getOpcode()) {
12299
98
    
default: C = 0; break41
;
12300
98
    
case ISD::SELECT: C = 1; break50
;
12301
98
    
case ISD::SELECT_CC: C = 2; break7
;
12302
98
    }
12303
98
12304
98
    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12305
98
         
PromOp.getOperand(C).getValueType() != N->getValueType(0)72
) ||
12306
98
        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12307
98
         
PromOp.getOperand(C+1).getValueType() != N->getValueType(0)52
)) {
12308
0
      // The to-be-promoted operands of this node have not yet been
12309
0
      // promoted (this should be rare because we're going through the
12310
0
      // list backward, but if one of the operands has several users in
12311
0
      // this cluster of to-be-promoted nodes, it is possible).
12312
0
      PromOpHandles.emplace_front(PromOp);
12313
0
      continue;
12314
0
    }
12315
98
12316
98
    // For SELECT and SELECT_CC nodes, we do a similar check for any
12317
98
    // to-be-promoted comparison inputs.
12318
98
    if (PromOp.getOpcode() == ISD::SELECT ||
12319
98
        
PromOp.getOpcode() == ISD::SELECT_CC48
) {
12320
57
      if ((SelectTruncOp[0].count(PromOp.getNode()) &&
12321
57
           
PromOp.getOperand(0).getValueType() != N->getValueType(0)5
) ||
12322
57
          (SelectTruncOp[1].count(PromOp.getNode()) &&
12323
57
           
PromOp.getOperand(1).getValueType() != N->getValueType(0)1
)) {
12324
0
        PromOpHandles.emplace_front(PromOp);
12325
0
        continue;
12326
0
      }
12327
98
    }
12328
98
12329
98
    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12330
98
                                PromOp.getNode()->op_end());
12331
98
12332
98
    // If this node has constant inputs, then they'll need to be promoted here.
12333
294
    for (unsigned i = 0; i < 2; 
++i196
) {
12334
196
      if (!isa<ConstantSDNode>(Ops[C+i]))
12335
124
        continue;
12336
72
      if (Ops[C+i].getValueType() == N->getValueType(0))
12337
0
        continue;
12338
72
12339
72
      if (N->getOpcode() == ISD::SIGN_EXTEND)
12340
6
        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12341
66
      else if (N->getOpcode() == ISD::ZERO_EXTEND)
12342
54
        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12343
12
      else
12344
12
        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12345
72
    }
12346
98
12347
98
    // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12348
98
    // truncate them again to the original value type.
12349
98
    if (PromOp.getOpcode() == ISD::SELECT ||
12350
98
        
PromOp.getOpcode() == ISD::SELECT_CC48
) {
12351
57
      auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
12352
57
      if (SI0 != SelectTruncOp[0].end())
12353
5
        Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
12354
57
      auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
12355
57
      if (SI1 != SelectTruncOp[1].end())
12356
1
        Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
12357
57
    }
12358
98
12359
98
    DAG.ReplaceAllUsesOfValueWith(PromOp,
12360
98
      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
12361
98
  }
12362
69
12363
69
  // Now we're left with the initial extension itself.
12364
69
  if (!ReallyNeedsExt)
12365
58
    return N->getOperand(0);
12366
11
12367
11
  // To zero extend, just mask off everything except for the first bit (in the
12368
11
  // i1 case).
12369
11
  if (N->getOpcode() == ISD::ZERO_EXTEND)
12370
7
    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
12371
7
                       DAG.getConstant(APInt::getLowBitsSet(
12372
7
                                         N->getValueSizeInBits(0), PromBits),
12373
7
                                       dl, N->getValueType(0)));
12374
4
12375
4
  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
12376
4
         "Invalid extension type");
12377
4
  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
12378
4
  SDValue ShiftCst =
12379
4
      DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
12380
4
  return DAG.getNode(
12381
4
      ISD::SRA, dl, N->getValueType(0),
12382
4
      DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
12383
4
      ShiftCst);
12384
4
}
12385
12386
SDValue PPCTargetLowering::combineSetCC(SDNode *N,
12387
6.74k
                                        DAGCombinerInfo &DCI) const {
12388
6.74k
  assert(N->getOpcode() == ISD::SETCC &&
12389
6.74k
         "Should be called with a SETCC node");
12390
6.74k
12391
6.74k
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12392
6.74k
  if (CC == ISD::SETNE || 
CC == ISD::SETEQ5.73k
) {
12393
2.99k
    SDValue LHS = N->getOperand(0);
12394
2.99k
    SDValue RHS = N->getOperand(1);
12395
2.99k
12396
2.99k
    // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12397
2.99k
    if (LHS.getOpcode() == ISD::SUB && 
isNullConstant(LHS.getOperand(0))48
&&
12398
2.99k
        
LHS.hasOneUse()38
)
12399
38
      std::swap(LHS, RHS);
12400
2.99k
12401
2.99k
    // x == 0-y --> x+y == 0
12402
2.99k
    // x != 0-y --> x+y != 0
12403
2.99k
    if (RHS.getOpcode() == ISD::SUB && 
isNullConstant(RHS.getOperand(0))56
&&
12404
2.99k
        
RHS.hasOneUse()56
) {
12405
56
      SDLoc DL(N);
12406
56
      SelectionDAG &DAG = DCI.DAG;
12407
56
      EVT VT = N->getValueType(0);
12408
56
      EVT OpVT = LHS.getValueType();
12409
56
      SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
12410
56
      return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
12411
56
    }
12412
6.69k
  }
12413
6.69k
12414
6.69k
  return DAGCombineTruncBoolExt(N, DCI);
12415
6.69k
}
12416
12417
// Is this an extending load from an f32 to an f64?
12418
202
static bool isFPExtLoad(SDValue Op) {
12419
202
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
12420
120
    return LD->getExtensionType() == ISD::EXTLOAD &&
12421
120
      
Op.getValueType() == MVT::f6496
;
12422
82
  return false;
12423
82
}
12424
12425
/// Reduces the number of fp-to-int conversion when building a vector.
12426
///
12427
/// If this vector is built out of floating to integer conversions,
12428
/// transform it to a vector built out of floating point values followed by a
12429
/// single floating to integer conversion of the vector.
12430
/// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
12431
/// becomes (fptosi (build_vector ($A, $B, ...)))
12432
SDValue PPCTargetLowering::
12433
combineElementTruncationToVectorTruncation(SDNode *N,
12434
310
                                           DAGCombinerInfo &DCI) const {
12435
310
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12436
310
         "Should be called with a BUILD_VECTOR node");
12437
310
12438
310
  SelectionDAG &DAG = DCI.DAG;
12439
310
  SDLoc dl(N);
12440
310
12441
310
  SDValue FirstInput = N->getOperand(0);
12442
310
  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
12443
310
         "The input operand must be an fp-to-int conversion.");
12444
310
12445
310
  // This combine happens after legalization so the fp_to_[su]i nodes are
12446
310
  // already converted to PPCSISD nodes.
12447
310
  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
12448
310
  if (FirstConversion == PPCISD::FCTIDZ ||
12449
310
      
FirstConversion == PPCISD::FCTIDUZ265
||
12450
310
      
FirstConversion == PPCISD::FCTIWZ130
||
12451
310
      
FirstConversion == PPCISD::FCTIWUZ65
) {
12452
310
    bool IsSplat = true;
12453
310
    bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
12454
310
      
FirstConversion == PPCISD::FCTIWUZ245
;
12455
310
    EVT SrcVT = FirstInput.getOperand(0).getValueType();
12456
310
    SmallVector<SDValue, 4> Ops;
12457
310
    EVT TargetVT = N->getValueType(0);
12458
766
    for (int i = 0, e = N->getNumOperands(); i < e; 
++i456
) {
12459
562
      SDValue NextOp = N->getOperand(i);
12460
562
      if (NextOp.getOpcode() != PPCISD::MFVSR)
12461
0
        return SDValue();
12462
562
      unsigned NextConversion = NextOp.getOperand(0).getOpcode();
12463
562
      if (NextConversion != FirstConversion)
12464
0
        return SDValue();
12465
562
      // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12466
562
      // This is not valid if the input was originally double precision. It is
12467
562
      // also not profitable to do unless this is an extending load in which
12468
562
      // case doing this combine will allow us to combine consecutive loads.
12469
562
      if (Is32Bit && 
!isFPExtLoad(NextOp.getOperand(0).getOperand(0))202
)
12470
106
        return SDValue();
12471
456
      if (N->getOperand(i) != FirstInput)
12472
236
        IsSplat = false;
12473
456
    }
12474
310
12475
310
    // If this is a splat, we leave it as-is since there will be only a single
12476
310
    // fp-to-int conversion followed by a splat of the integer. This is better
12477
310
    // for 32-bit and smaller ints and neutral for 64-bit ints.
12478
310
    
if (204
IsSplat204
)
12479
16
      return SDValue();
12480
188
12481
188
    // Now that we know we have the right type of node, get its operands
12482
612
    
for (int i = 0, e = N->getNumOperands(); 188
i < e;
++i424
) {
12483
424
      SDValue In = N->getOperand(i).getOperand(0);
12484
424
      if (Is32Bit) {
12485
96
        // For 32-bit values, we need to add an FP_ROUND node (if we made it
12486
96
        // here, we know that all inputs are extending loads so this is safe).
12487
96
        if (In.isUndef())
12488
0
          Ops.push_back(DAG.getUNDEF(SrcVT));
12489
96
        else {
12490
96
          SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
12491
96
                                      MVT::f32, In.getOperand(0),
12492
96
                                      DAG.getIntPtrConstant(1, dl));
12493
96
          Ops.push_back(Trunc);
12494
96
        }
12495
96
      } else
12496
328
        Ops.push_back(In.isUndef() ? 
DAG.getUNDEF(SrcVT)0
: In.getOperand(0));
12497
424
    }
12498
188
12499
188
    unsigned Opcode;
12500
188
    if (FirstConversion == PPCISD::FCTIDZ ||
12501
188
        
FirstConversion == PPCISD::FCTIWZ151
)
12502
49
      Opcode = ISD::FP_TO_SINT;
12503
139
    else
12504
139
      Opcode = ISD::FP_TO_UINT;
12505
188
12506
188
    EVT NewVT = TargetVT == MVT::v2i64 ? 
MVT::v2f64164
:
MVT::v4f3224
;
12507
188
    SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
12508
188
    return DAG.getNode(Opcode, dl, TargetVT, BV);
12509
188
  }
12510
0
  return SDValue();
12511
0
}
12512
12513
/// Reduce the number of loads when building a vector.
12514
///
12515
/// Building a vector out of multiple loads can be converted to a load
12516
/// of the vector type if the loads are consecutive. If the loads are
12517
/// consecutive but in descending order, a shuffle is added at the end
12518
/// to reorder the vector.
12519
4.18k
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
12520
4.18k
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12521
4.18k
         "Should be called with a BUILD_VECTOR node");
12522
4.18k
12523
4.18k
  SDLoc dl(N);
12524
4.18k
12525
4.18k
  // Return early for non byte-sized type, as they can't be consecutive.
12526
4.18k
  if (!N->getValueType(0).getVectorElementType().isByteSized())
12527
3
    return SDValue();
12528
4.17k
12529
4.17k
  bool InputsAreConsecutiveLoads = true;
12530
4.17k
  bool InputsAreReverseConsecutive = true;
12531
4.17k
  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
12532
4.17k
  SDValue FirstInput = N->getOperand(0);
12533
4.17k
  bool IsRoundOfExtLoad = false;
12534
4.17k
12535
4.17k
  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
12536
4.17k
      
FirstInput.getOperand(0).getOpcode() == ISD::LOAD26
) {
12537
24
    LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
12538
24
    IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
12539
24
  }
12540
4.17k
  // Not a build vector of (possibly fp_rounded) loads.
12541
4.17k
  if ((!IsRoundOfExtLoad && 
FirstInput.getOpcode() != ISD::LOAD4.15k
) ||
12542
4.17k
      
N->getNumOperands() == 1412
)
12543
3.76k
    return SDValue();
12544
410
12545
587
  
for (int i = 1, e = N->getNumOperands(); 410
i < e;
++i177
) {
12546
490
    // If any inputs are fp_round(extload), they all must be.
12547
490
    if (IsRoundOfExtLoad && 
N->getOperand(i).getOpcode() != ISD::FP_ROUND40
)
12548
0
      return SDValue();
12549
490
12550
490
    SDValue NextInput = IsRoundOfExtLoad ? 
N->getOperand(i).getOperand(0)40
:
12551
490
      
N->getOperand(i)450
;
12552
490
    if (NextInput.getOpcode() != ISD::LOAD)
12553
32
      return SDValue();
12554
458
12555
458
    SDValue PreviousInput =
12556
458
      IsRoundOfExtLoad ? 
N->getOperand(i-1).getOperand(0)40
:
N->getOperand(i-1)418
;
12557
458
    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
12558
458
    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
12559
458
12560
458
    // If any inputs are fp_round(extload), they all must be.
12561
458
    if (IsRoundOfExtLoad && 
LD2->getExtensionType() != ISD::EXTLOAD40
)
12562
0
      return SDValue();
12563
458
12564
458
    if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
12565
385
      InputsAreConsecutiveLoads = false;
12566
458
    if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
12567
354
      InputsAreReverseConsecutive = false;
12568
458
12569
458
    // Exit early if the loads are neither consecutive nor reverse consecutive.
12570
458
    if (!InputsAreConsecutiveLoads && 
!InputsAreReverseConsecutive385
)
12571
281
      return SDValue();
12572
458
  }
12573
410
12574
410
  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
12575
97
         "The loads cannot be both consecutive and reverse consecutive.");
12576
97
12577
97
  SDValue FirstLoadOp =
12578
97
    IsRoundOfExtLoad ? 
FirstInput.getOperand(0)8
:
FirstInput89
;
12579
97
  SDValue LastLoadOp =
12580
97
    IsRoundOfExtLoad ? 
N->getOperand(N->getNumOperands()-1).getOperand(0)8
:
12581
97
                       
N->getOperand(N->getNumOperands()-1)89
;
12582
97
12583
97
  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
12584
97
  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
12585
97
  if (InputsAreConsecutiveLoads) {
12586
41
    assert(LD1 && "Input needs to be a LoadSDNode.");
12587
41
    return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
12588
41
                       LD1->getBasePtr(), LD1->getPointerInfo(),
12589
41
                       LD1->getAlignment());
12590
41
  }
12591
56
  if (InputsAreReverseConsecutive) {
12592
56
    assert(LDL && "Input needs to be a LoadSDNode.");
12593
56
    SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
12594
56
                               LDL->getBasePtr(), LDL->getPointerInfo(),
12595
56
                               LDL->getAlignment());
12596
56
    SmallVector<int, 16> Ops;
12597
216
    for (int i = N->getNumOperands() - 1; i >= 0; 
i--160
)
12598
160
      Ops.push_back(i);
12599
56
12600
56
    return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
12601
56
                                DAG.getUNDEF(N->getValueType(0)), Ops);
12602
56
  }
12603
0
  return SDValue();
12604
0
}
12605
12606
// This function adds the required vector_shuffle needed to get
12607
// the elements of the vector extract in the correct position
12608
// as specified by the CorrectElems encoding.
12609
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
12610
                                      SDValue Input, uint64_t Elems,
12611
11
                                      uint64_t CorrectElems) {
12612
11
  SDLoc dl(N);
12613
11
12614
11
  unsigned NumElems = Input.getValueType().getVectorNumElements();
12615
11
  SmallVector<int, 16> ShuffleMask(NumElems, -1);
12616
11
12617
11
  // Knowing the element indices being extracted from the original
12618
11
  // vector and the order in which they're being inserted, just put
12619
11
  // them at element indices required for the instruction.
12620
43
  for (unsigned i = 0; i < N->getNumOperands(); 
i++32
) {
12621
32
    if (DAG.getDataLayout().isLittleEndian())
12622
18
      ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
12623
14
    else
12624
14
      ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
12625
32
    CorrectElems = CorrectElems >> 8;
12626
32
    Elems = Elems >> 8;
12627
32
  }
12628
11
12629
11
  SDValue Shuffle =
12630
11
      DAG.getVectorShuffle(Input.getValueType(), dl, Input,
12631
11
                           DAG.getUNDEF(Input.getValueType()), ShuffleMask);
12632
11
12633
11
  EVT Ty = N->getValueType(0);
12634
11
  SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
12635
11
  return BV;
12636
11
}
12637
12638
// Look for build vector patterns where input operands come from sign
12639
// extended vector_extract elements of specific indices. If the correct indices
12640
// aren't used, add a vector shuffle to fix up the indices and create a new
12641
// PPCISD:SExtVElems node which selects the vector sign extend instructions
12642
// during instruction selection.
12643
1.17k
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
12644
1.17k
  // This array encodes the indices that the vector sign extend instructions
12645
1.17k
  // extract from when extending from one type to another for both BE and LE.
12646
1.17k
  // The right nibble of each byte corresponds to the LE incides.
12647
1.17k
  // and the left nibble of each byte corresponds to the BE incides.
12648
1.17k
  // For example: 0x3074B8FC  byte->word
12649
1.17k
  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12650
1.17k
  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12651
1.17k
  // For example: 0x000070F8  byte->double word
12652
1.17k
  // For LE: the allowed indices are: 0x0,0x8
12653
1.17k
  // For BE: the allowed indices are: 0x7,0xF
12654
1.17k
  uint64_t TargetElems[] = {
12655
1.17k
      0x3074B8FC, // b->w
12656
1.17k
      0x000070F8, // b->d
12657
1.17k
      0x10325476, // h->w
12658
1.17k
      0x00003074, // h->d
12659
1.17k
      0x00001032, // w->d
12660
1.17k
  };
12661
1.17k
12662
1.17k
  uint64_t Elems = 0;
12663
1.17k
  int Index;
12664
1.17k
  SDValue Input;
12665
1.17k
12666
1.24k
  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
12667
1.24k
    if (!Op)
12668
0
      return false;
12669
1.24k
    if (Op.getOpcode() != ISD::SIGN_EXTEND &&
12670
1.24k
        
Op.getOpcode() != ISD::SIGN_EXTEND_INREG1.23k
)
12671
1.14k
      return false;
12672
104
12673
104
    // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12674
104
    // of the right width.
12675
104
    SDValue Extract = Op.getOperand(0);
12676
104
    if (Extract.getOpcode() == ISD::ANY_EXTEND)
12677
24
      Extract = Extract.getOperand(0);
12678
104
    if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12679
0
      return false;
12680
104
12681
104
    ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
12682
104
    if (!ExtOp)
12683
0
      return false;
12684
104
12685
104
    Index = ExtOp->getZExtValue();
12686
104
    if (Input && 
Input != Extract.getOperand(0)71
)
12687
2
      return false;
12688
102
12689
102
    if (!Input)
12690
33
      Input = Extract.getOperand(0);
12691
102
12692
102
    Elems = Elems << 8;
12693
102
    Index = DAG.getDataLayout().isLittleEndian() ? 
Index53
:
Index << 449
;
12694
102
    Elems |= Index;
12695
102
12696
102
    return true;
12697
102
  };
12698
1.17k
12699
1.17k
  // If the build vector operands aren't sign extended vector extracts,
12700
1.17k
  // of the same input vector, then return.
12701
1.28k
  for (unsigned i = 0; i < N->getNumOperands(); 
i++102
) {
12702
1.24k
    if (!isSExtOfVecExtract(N->getOperand(i))) {
12703
1.14k
      return SDValue();
12704
1.14k
    }
12705
1.24k
  }
12706
1.17k
12707
1.17k
  // If the vector extract indicies are not correct, add the appropriate
12708
1.17k
  // vector_shuffle.
12709
1.17k
  int TgtElemArrayIdx;
12710
31
  int InputSize = Input.getValueType().getScalarSizeInBits();
12711
31
  int OutputSize = N->getValueType(0).getScalarSizeInBits();
12712
31
  if (InputSize + OutputSize == 40)
12713
6
    TgtElemArrayIdx = 0;
12714
25
  else if (InputSize + OutputSize == 72)
12715
6
    TgtElemArrayIdx = 1;
12716
19
  else if (InputSize + OutputSize == 48)
12717
7
    TgtElemArrayIdx = 2;
12718
12
  else if (InputSize + OutputSize == 80)
12719
6
    TgtElemArrayIdx = 3;
12720
6
  else if (InputSize + OutputSize == 96)
12721
4
    TgtElemArrayIdx = 4;
12722
2
  else
12723
2
    return SDValue();
12724
29
12725
29
  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
12726
29
  CorrectElems = DAG.getDataLayout().isLittleEndian()
12727
29
                     ? 
CorrectElems & 0x0F0F0F0F0F0F0F0F15
12728
29
                     : 
CorrectElems & 0xF0F0F0F0F0F0F0F014
;
12729
29
  if (Elems != CorrectElems) {
12730
11
    return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
12731
11
  }
12732
18
12733
18
  // Regular lowering will catch cases where a shuffle is not needed.
12734
18
  return SDValue();
12735
18
}
12736
12737
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
12738
4.95k
                                                 DAGCombinerInfo &DCI) const {
12739
4.95k
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12740
4.95k
         "Should be called with a BUILD_VECTOR node");
12741
4.95k
12742
4.95k
  SelectionDAG &DAG = DCI.DAG;
12743
4.95k
  SDLoc dl(N);
12744
4.95k
12745
4.95k
  if (!Subtarget.hasVSX())
12746
584
    return SDValue();
12747
4.36k
12748
4.36k
  // The target independent DAG combiner will leave a build_vector of
12749
4.36k
  // float-to-int conversions intact. We can generate MUCH better code for
12750
4.36k
  // a float-to-int conversion of a vector of floats.
12751
4.36k
  SDValue FirstInput = N->getOperand(0);
12752
4.36k
  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
12753
310
    SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
12754
310
    if (Reduced)
12755
188
      return Reduced;
12756
4.18k
  }
12757
4.18k
12758
4.18k
  // If we're building a vector out of consecutive loads, just load that
12759
4.18k
  // vector type.
12760
4.18k
  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
12761
4.18k
  if (Reduced)
12762
97
    return Reduced;
12763
4.08k
12764
4.08k
  // If we're building a vector out of extended elements from another vector
12765
4.08k
  // we have P9 vector integer extend instructions. The code assumes legal
12766
4.08k
  // input types (i.e. it can't handle things like v4i16) so do not run before
12767
4.08k
  // legalization.
12768
4.08k
  if (Subtarget.hasP9Altivec() && 
!DCI.isBeforeLegalize()1.91k
) {
12769
1.17k
    Reduced = combineBVOfVecSExt(N, DAG);
12770
1.17k
    if (Reduced)
12771
11
      return Reduced;
12772
4.07k
  }
12773
4.07k
12774
4.07k
12775
4.07k
  if (N->getValueType(0) != MVT::v2f64)
12776
3.31k
    return SDValue();
12777
754
12778
754
  // Looking for:
12779
754
  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12780
754
  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
12781
754
      
FirstInput.getOpcode() != ISD::UINT_TO_FP701
)
12782
640
    return SDValue();
12783
114
  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
12784
114
      
N->getOperand(1).getOpcode() != ISD::UINT_TO_FP61
)
12785
0
    return SDValue();
12786
114
  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
12787
0
    return SDValue();
12788
114
12789
114
  SDValue Ext1 = FirstInput.getOperand(0);
12790
114
  SDValue Ext2 = N->getOperand(1).getOperand(0);
12791
114
  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12792
114
     
Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT102
)
12793
12
    return SDValue();
12794
102
12795
102
  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
12796
102
  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
12797
102
  if (!Ext1Op || !Ext2Op)
12798
0
    return SDValue();
12799
102
  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
12800
102
      
Ext1.getOperand(0) != Ext2.getOperand(0)98
)
12801
8
    return SDValue();
12802
94
12803
94
  int FirstElem = Ext1Op->getZExtValue();
12804
94
  int SecondElem = Ext2Op->getZExtValue();
12805
94
  int SubvecIdx;
12806
94
  if (FirstElem == 0 && 
SecondElem == 152
)
12807
52
    SubvecIdx = Subtarget.isLittleEndian() ? 
133
:
019
;
12808
42
  else if (FirstElem == 2 && SecondElem == 3)
12809
42
    SubvecIdx = Subtarget.isLittleEndian() ? 
028
:
114
;
12810
0
  else
12811
0
    return SDValue();
12812
94
12813
94
  SDValue SrcVec = Ext1.getOperand(0);
12814
94
  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
12815
49
    PPCISD::SINT_VEC_TO_FP : 
PPCISD::UINT_VEC_TO_FP45
;
12816
94
  return DAG.getNode(NodeType, dl, MVT::v2f64,
12817
94
                     SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
12818
94
}
12819
12820
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
12821
2.51k
                                              DAGCombinerInfo &DCI) const {
12822
2.51k
  assert((N->getOpcode() == ISD::SINT_TO_FP ||
12823
2.51k
          N->getOpcode() == ISD::UINT_TO_FP) &&
12824
2.51k
         "Need an int -> FP conversion node here");
12825
2.51k
12826
2.51k
  if (useSoftFloat() || 
!Subtarget.has64BitSupport()2.51k
)
12827
41
    return SDValue();
12828
2.47k
12829
2.47k
  SelectionDAG &DAG = DCI.DAG;
12830
2.47k
  SDLoc dl(N);
12831
2.47k
  SDValue Op(N, 0);
12832
2.47k
12833
2.47k
  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12834
2.47k
  // from the hardware.
12835
2.47k
  if (Op.getValueType() != MVT::f32 && 
Op.getValueType() != MVT::f642.13k
)
12836
1.97k
    return SDValue();
12837
496
  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
12838
496
      Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
12839
1
    return SDValue();
12840
495
12841
495
  SDValue FirstOperand(Op.getOperand(0));
12842
495
  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
12843
495
    
(60
FirstOperand.getValueType() == MVT::i860
||
12844
60
     
FirstOperand.getValueType() == MVT::i1643
);
12845
495
  if (Subtarget.hasP9Vector() && 
Subtarget.hasP9Altivec()212
&&
SubWordLoad212
) {
12846
32
    bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
12847
32
    bool DstDouble = Op.getValueType() == MVT::f64;
12848
32
    unsigned ConvOp = Signed ?
12849
16
      (DstDouble ? 
PPCISD::FCFID8
:
PPCISD::FCFIDS8
) :
12850
32
      
(DstDouble 16
?
PPCISD::FCFIDU8
:
PPCISD::FCFIDUS8
);
12851
32
    SDValue WidthConst =
12852
32
      DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 
116
:
216
,
12853
32
                            dl, false);
12854
32
    LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
12855
32
    SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
12856
32
    SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
12857
32
                                         DAG.getVTList(MVT::f64, MVT::Other),
12858
32
                                         Ops, MVT::i8, LDN->getMemOperand());
12859
32
12860
32
    // For signed conversion, we need to sign-extend the value in the VSR
12861
32
    if (Signed) {
12862
16
      SDValue ExtOps[] = { Ld, WidthConst };
12863
16
      SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
12864
16
      return DAG.getNode(ConvOp, dl, DstDouble ? 
MVT::f648
:
MVT::f328
, Ext);
12865
16
    } else
12866
16
      return DAG.getNode(ConvOp, dl, DstDouble ? 
MVT::f648
:
MVT::f328
, Ld);
12867
463
  }
12868
463
12869
463
12870
463
  // For i32 intermediate values, unfortunately, the conversion functions
12871
463
  // leave the upper 32 bits of the value are undefined. Within the set of
12872
463
  // scalar instructions, we have no method for zero- or sign-extending the
12873
463
  // value. Thus, we cannot handle i32 intermediate values here.
12874
463
  if (Op.getOperand(0).getValueType() == MVT::i32)
12875
154
    return SDValue();
12876
309
12877
309
  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
12878
309
         "UINT_TO_FP is supported only with FPCVT");
12879
309
12880
309
  // If we have FCFIDS, then use it when converting to single-precision.
12881
309
  // Otherwise, convert to double-precision and then round.
12882
309
  unsigned FCFOp = (Subtarget.hasFPCVT() && 
Op.getValueType() == MVT::f32291
)
12883
309
                       ? 
(Op.getOpcode() == ISD::UINT_TO_FP 230
?
PPCISD::FCFIDUS119
12884
230
                                                            : 
PPCISD::FCFIDS111
)
12885
309
                       : 
(Op.getOpcode() == ISD::UINT_TO_FP 79
?
PPCISD::FCFIDU37
12886
79
                                                            : 
PPCISD::FCFID42
);
12887
309
  MVT FCFTy = (Subtarget.hasFPCVT() && 
Op.getValueType() == MVT::f32291
)
12888
309
                  ? 
MVT::f32230
12889
309
                  : 
MVT::f6479
;
12890
309
12891
309
  // If we're converting from a float, to an int, and back to a float again,
12892
309
  // then we don't need the store/load pair at all.
12893
309
  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
12894
309
       
Subtarget.hasFPCVT()1
) ||
12895
309
      
(Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)308
) {
12896
12
    SDValue Src = Op.getOperand(0).getOperand(0);
12897
12
    if (Src.getValueType() == MVT::f32) {
12898
3
      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
12899
3
      DCI.AddToWorklist(Src.getNode());
12900
9
    } else if (Src.getValueType() != MVT::f64) {
12901
1
      // Make sure that we don't pick up a ppc_fp128 source value.
12902
1
      return SDValue();
12903
1
    }
12904
11
12905
11
    unsigned FCTOp =
12906
11
      Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? 
PPCISD::FCTIDZ10
:
12907
11
                                                        
PPCISD::FCTIDUZ1
;
12908
11
12909
11
    SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
12910
11
    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
12911
11
12912
11
    if (Op.getValueType() == MVT::f32 && 
!Subtarget.hasFPCVT()3
) {
12913
3
      FP = DAG.getNode(ISD::FP_ROUND, dl,
12914
3
                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
12915
3
      DCI.AddToWorklist(FP.getNode());
12916
3
    }
12917
11
12918
11
    return FP;
12919
11
  }
12920
297
12921
297
  return SDValue();
12922
297
}
12923
12924
// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12925
// builtins) into loads with swaps.
12926
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
12927
1.14k
                                              DAGCombinerInfo &DCI) const {
12928
1.14k
  SelectionDAG &DAG = DCI.DAG;
12929
1.14k
  SDLoc dl(N);
12930
1.14k
  SDValue Chain;
12931
1.14k
  SDValue Base;
12932
1.14k
  MachineMemOperand *MMO;
12933
1.14k
12934
1.14k
  switch (N->getOpcode()) {
12935
1.14k
  default:
12936
0
    llvm_unreachable("Unexpected opcode for little endian VSX load");
12937
1.14k
  case ISD::LOAD: {
12938
1.13k
    LoadSDNode *LD = cast<LoadSDNode>(N);
12939
1.13k
    Chain = LD->getChain();
12940
1.13k
    Base = LD->getBasePtr();
12941
1.13k
    MMO = LD->getMemOperand();
12942
1.13k
    // If the MMO suggests this isn't a load of a full vector, leave
12943
1.13k
    // things alone.  For a built-in, we have to make the change for
12944
1.13k
    // correctness, so if there is a size problem that will be a bug.
12945
1.13k
    if (MMO->getSize() < 16)
12946
0
      return SDValue();
12947
1.13k
    break;
12948
1.13k
  }
12949
1.13k
  case ISD::INTRINSIC_W_CHAIN: {
12950
14
    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
12951
14
    Chain = Intrin->getChain();
12952
14
    // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12953
14
    // us what we want. Get operand 2 instead.
12954
14
    Base = Intrin->getOperand(2);
12955
14
    MMO = Intrin->getMemOperand();
12956
14
    break;
12957
1.14k
  }
12958
1.14k
  }
12959
1.14k
12960
1.14k
  MVT VecTy = N->getValueType(0).getSimpleVT();
12961
1.14k
12962
1.14k
  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12963
1.14k
  // aligned and the type is a vector with elements up to 4 bytes
12964
1.14k
  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
12965
1.14k
      && 
VecTy.getScalarSizeInBits() <= 321.03k
) {
12966
609
    return SDValue();
12967
609
  }
12968
539
12969
539
  SDValue LoadOps[] = { Chain, Base };
12970
539
  SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
12971
539
                                         DAG.getVTList(MVT::v2f64, MVT::Other),
12972
539
                                         LoadOps, MVT::v2f64, MMO);
12973
539
12974
539
  DCI.AddToWorklist(Load.getNode());
12975
539
  Chain = Load.getValue(1);
12976
539
  SDValue Swap = DAG.getNode(
12977
539
      PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
12978
539
  DCI.AddToWorklist(Swap.getNode());
12979
539
12980
539
  // Add a bitcast if the resulting load type doesn't match v2f64.
12981
539
  if (VecTy != MVT::v2f64) {
12982
155
    SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
12983
155
    DCI.AddToWorklist(N.getNode());
12984
155
    // Package {bitcast value, swap's chain} to match Load's shape.
12985
155
    return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
12986
155
                       N, Swap.getValue(1));
12987
155
  }
12988
384
12989
384
  return Swap;
12990
384
}
12991
12992
// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
12993
// builtins) into stores with swaps.
12994
SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
12995
783
                                               DAGCombinerInfo &DCI) const {
12996
783
  SelectionDAG &DAG = DCI.DAG;
12997
783
  SDLoc dl(N);
12998
783
  SDValue Chain;
12999
783
  SDValue Base;
13000
783
  unsigned SrcOpnd;
13001
783
  MachineMemOperand *MMO;
13002
783
13003
783
  switch (N->getOpcode()) {
13004
783
  default:
13005
0
    llvm_unreachable("Unexpected opcode for little endian VSX store");
13006
783
  case ISD::STORE: {
13007
771
    StoreSDNode *ST = cast<StoreSDNode>(N);
13008
771
    Chain = ST->getChain();
13009
771
    Base = ST->getBasePtr();
13010
771
    MMO = ST->getMemOperand();
13011
771
    SrcOpnd = 1;
13012
771
    // If the MMO suggests this isn't a store of a full vector, leave
13013
771
    // things alone.  For a built-in, we have to make the change for
13014
771
    // correctness, so if there is a size problem that will be a bug.
13015
771
    if (MMO->getSize() < 16)
13016
0
      return SDValue();
13017
771
    break;
13018
771
  }
13019
771
  case ISD::INTRINSIC_VOID: {
13020
12
    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13021
12
    Chain = Intrin->getChain();
13022
12
    // Intrin->getBasePtr() oddly does not get what we want.
13023
12
    Base = Intrin->getOperand(3);
13024
12
    MMO = Intrin->getMemOperand();
13025
12
    SrcOpnd = 2;
13026
12
    break;
13027
783
  }
13028
783
  }
13029
783
13030
783
  SDValue Src = N->getOperand(SrcOpnd);
13031
783
  MVT VecTy = Src.getValueType().getSimpleVT();
13032
783
13033
783
  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13034
783
  // aligned and the type is a vector with elements up to 4 bytes
13035
783
  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
13036
783
      && 
VecTy.getScalarSizeInBits() <= 32712
) {
13037
451
    return SDValue();
13038
451
  }
13039
332
13040
332
  // All stores are done as v2f64 and possible bit cast.
13041
332
  if (VecTy != MVT::v2f64) {
13042
149
    Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13043
149
    DCI.AddToWorklist(Src.getNode());
13044
149
  }
13045
332
13046
332
  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13047
332
                             DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13048
332
  DCI.AddToWorklist(Swap.getNode());
13049
332
  Chain = Swap.getValue(1);
13050
332
  SDValue StoreOps[] = { Chain, Swap, Base };
13051
332
  SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13052
332
                                          DAG.getVTList(MVT::Other),
13053
332
                                          StoreOps, VecTy, MMO);
13054
332
  DCI.AddToWorklist(Store.getNode());
13055
332
  return Store;
13056
332
}
13057
13058
// Handle DAG combine for STORE (FP_TO_INT F).
13059
SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13060
587
                                               DAGCombinerInfo &DCI) const {
13061
587
13062
587
  SelectionDAG &DAG = DCI.DAG;
13063
587
  SDLoc dl(N);
13064
587
  unsigned Opcode = N->getOperand(1).getOpcode();
13065
587
13066
587
  assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13067
587
         && "Not a FP_TO_INT Instruction!");
13068
587
13069
587
  SDValue Val = N->getOperand(1).getOperand(0);
13070
587
  EVT Op1VT = N->getOperand(1).getValueType();
13071
587
  EVT ResVT = Val.getValueType();
13072
587
13073
587
  // Floating point types smaller than 32 bits are not legal on Power.
13074
587
  if (ResVT.getScalarSizeInBits() < 32)
13075
0
    return SDValue();
13076
587
13077
587
  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13078
587
  bool ValidTypeForStoreFltAsInt =
13079
587
        (Op1VT == MVT::i32 || 
Op1VT == MVT::i64535
||
13080
587
         
(508
Subtarget.hasP9Vector()508
&&
(388
Op1VT == MVT::i16388
||
Op1VT == MVT::i8372
)));
13081
587
13082
587
  if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
13083
587
      
cast<StoreSDNode>(N)->isTruncatingStore()558
||
!ValidTypeForStoreFltAsInt558
)
13084
501
    return SDValue();
13085
86
13086
86
  // Extend f32 values to f64
13087
86
  if (ResVT.getScalarSizeInBits() == 32) {
13088
34
    Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13089
34
    DCI.AddToWorklist(Val.getNode());
13090
34
  }
13091
86
13092
86
  // Set signed or unsigned conversion opcode.
13093
86
  unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13094
47
                          PPCISD::FP_TO_SINT_IN_VSR :
13095
86
                          
PPCISD::FP_TO_UINT_IN_VSR39
;
13096
86
13097
86
  Val = DAG.getNode(ConvOpcode,
13098
86
                    dl, ResVT == MVT::f128 ? 
MVT::f12818
:
MVT::f6468
, Val);
13099
86
  DCI.AddToWorklist(Val.getNode());
13100
86
13101
86
  // Set number of bytes being converted.
13102
86
  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13103
86
  SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13104
86
                    DAG.getIntPtrConstant(ByteSize, dl, false),
13105
86
                    DAG.getValueType(Op1VT) };
13106
86
13107
86
  Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
13108
86
          DAG.getVTList(MVT::Other), Ops,
13109
86
          cast<StoreSDNode>(N)->getMemoryVT(),
13110
86
          cast<StoreSDNode>(N)->getMemOperand());
13111
86
13112
86
  DCI.AddToWorklist(Val.getNode());
13113
86
  return Val;
13114
86
}
13115
13116
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
13117
153k
                                             DAGCombinerInfo &DCI) const {
13118
153k
  SelectionDAG &DAG = DCI.DAG;
13119
153k
  SDLoc dl(N);
13120
153k
  switch (N->getOpcode()) {
13121
153k
  
default: break48.3k
;
13122
153k
  case ISD::ADD:
13123
16.7k
    return combineADD(N, DCI);
13124
153k
  case ISD::SHL:
13125
2.28k
    return combineSHL(N, DCI);
13126
153k
  case ISD::SRA:
13127
490
    return combineSRA(N, DCI);
13128
153k
  case ISD::SRL:
13129
940
    return combineSRL(N, DCI);
13130
153k
  case ISD::MUL:
13131
784
    return combineMUL(N, DCI);
13132
153k
  case PPCISD::SHL:
13133
65
    if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
13134
1
        return N->getOperand(0);
13135
64
    break;
13136
64
  case PPCISD::SRL:
13137
61
    if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
13138
0
        return N->getOperand(0);
13139
61
    break;
13140
61
  case PPCISD::SRA:
13141
24
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
13142
0
      if (C->isNullValue() ||   //  0 >>s V -> 0.
13143
0
          C->isAllOnesValue())    // -1 >>s V -> -1.
13144
0
        return N->getOperand(0);
13145
24
    }
13146
24
    break;
13147
7.37k
  case ISD::SIGN_EXTEND:
13148
7.37k
  case ISD::ZERO_EXTEND:
13149
7.37k
  case ISD::ANY_EXTEND:
13150
7.37k
    return DAGCombineExtBoolTrunc(N, DCI);
13151
10.6k
  case ISD::TRUNCATE:
13152
10.6k
    return combineTRUNCATE(N, DCI);
13153
7.37k
  case ISD::SETCC:
13154
6.74k
    if (SDValue CSCC = combineSetCC(N, DCI))
13155
158
      return CSCC;
13156
6.59k
    LLVM_FALLTHROUGH;
13157
7.93k
  case ISD::SELECT_CC:
13158
7.93k
    return DAGCombineTruncBoolExt(N, DCI);
13159
6.59k
  case ISD::SINT_TO_FP:
13160
2.51k
  case ISD::UINT_TO_FP:
13161
2.51k
    return combineFPToIntToFP(N, DCI);
13162
21.6k
  case ISD::STORE: {
13163
21.6k
13164
21.6k
    EVT Op1VT = N->getOperand(1).getValueType();
13165
21.6k
    unsigned Opcode = N->getOperand(1).getOpcode();
13166
21.6k
13167
21.6k
    if (Opcode == ISD::FP_TO_SINT || 
Opcode == ISD::FP_TO_UINT21.3k
) {
13168
587
      SDValue Val= combineStoreFPToInt(N, DCI);
13169
587
      if (Val)
13170
86
        return Val;
13171
21.5k
    }
13172
21.5k
13173
21.5k
    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13174
21.5k
    if (cast<StoreSDNode>(N)->isUnindexed() && 
Opcode == ISD::BSWAP21.5k
&&
13175
21.5k
        
N->getOperand(1).getNode()->hasOneUse()45
&&
13176
21.5k
        
(45
Op1VT == MVT::i3245
||
Op1VT == MVT::i1623
||
13177
45
         
(14
Subtarget.hasLDBRX()14
&&
Subtarget.isPPC64()11
&&
Op1VT == MVT::i649
))) {
13178
40
13179
40
      // STBRX can only handle simple types and it makes no sense to store less
13180
40
      // two bytes in byte-reversed order.
13181
40
      EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
13182
40
      if (mVT.isExtended() || 
mVT.getSizeInBits() < 1639
)
13183
5
        break;
13184
35
13185
35
      SDValue BSwapOp = N->getOperand(1).getOperand(0);
13186
35
      // Do an any-extend to 32-bits if this is a half-word input.
13187
35
      if (BSwapOp.getValueType() == MVT::i16)
13188
9
        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
13189
35
13190
35
      // If the type of BSWAP operand is wider than stored memory width
13191
35
      // it need to be shifted to the right side before STBRX.
13192
35
      if (Op1VT.bitsGT(mVT)) {
13193
7
        int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
13194
7
        BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
13195
7
                              DAG.getConstant(Shift, dl, MVT::i32));
13196
7
        // Need to truncate if this is a bswap of i64 stored as i32/i16.
13197
7
        if (Op1VT == MVT::i64)
13198
4
          BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
13199
7
      }
13200
35
13201
35
      SDValue Ops[] = {
13202
35
        N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
13203
35
      };
13204
35
      return
13205
35
        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
13206
35
                                Ops, cast<StoreSDNode>(N)->getMemoryVT(),
13207
35
                                cast<StoreSDNode>(N)->getMemOperand());
13208
35
    }
13209
21.5k
13210
21.5k
    // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
13211
21.5k
    // So it can increase the chance of CSE constant construction.
13212
21.5k
    if (Subtarget.isPPC64() && 
!DCI.isBeforeLegalize()16.4k
&&
13213
21.5k
        
isa<ConstantSDNode>(N->getOperand(1))10.0k
&&
Op1VT == MVT::i32827
) {
13214
271
      // Need to sign-extended to 64-bits to handle negative values.
13215
271
      EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
13216
271
      uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
13217
271
                                    MemVT.getSizeInBits());
13218
271
      SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
13219
271
13220
271
      // DAG.getTruncStore() can't be used here because it doesn't accept
13221
271
      // the general (base + offset) addressing mode.
13222
271
      // So we use UpdateNodeOperands and setTruncatingStore instead.
13223
271
      DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
13224
271
                             N->getOperand(3));
13225
271
      cast<StoreSDNode>(N)->setTruncatingStore(true);
13226
271
      return SDValue(N, 0);
13227
271
    }
13228
21.2k
13229
21.2k
    // For little endian, VSX stores require generating xxswapd/lxvd2x.
13230
21.2k
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13231
21.2k
    if (Op1VT.isSimple()) {
13232
21.2k
      MVT StoreVT = Op1VT.getSimpleVT();
13233
21.2k
      if (Subtarget.needsSwapsForVSXMemOps() &&
13234
21.2k
          
(2.83k
StoreVT == MVT::v2f642.83k
||
StoreVT == MVT::v2i642.65k
||
13235
2.83k
           
StoreVT == MVT::v4f322.55k
||
StoreVT == MVT::v4i322.39k
))
13236
771
        return expandVSXStoreForLE(N, DCI);
13237
20.5k
    }
13238
20.5k
    break;
13239
20.5k
  }
13240
20.5k
  case ISD::LOAD: {
13241
20.3k
    LoadSDNode *LD = cast<LoadSDNode>(N);
13242
20.3k
    EVT VT = LD->getValueType(0);
13243
20.3k
13244
20.3k
    // For little endian, VSX loads require generating lxvd2x/xxswapd.
13245
20.3k
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13246
20.3k
    if (VT.isSimple()) {
13247
20.2k
      MVT LoadVT = VT.getSimpleVT();
13248
20.2k
      if (Subtarget.needsSwapsForVSXMemOps() &&
13249
20.2k
          
(3.35k
LoadVT == MVT::v2f643.35k
||
LoadVT == MVT::v2i642.97k
||
13250
3.35k
           
LoadVT == MVT::v4f322.88k
||
LoadVT == MVT::v4i322.75k
))
13251
1.13k
        return expandVSXLoadForLE(N, DCI);
13252
19.1k
    }
13253
19.1k
13254
19.1k
    // We sometimes end up with a 64-bit integer load, from which we extract
13255
19.1k
    // two single-precision floating-point numbers. This happens with
13256
19.1k
    // std::complex<float>, and other similar structures, because of the way we
13257
19.1k
    // canonicalize structure copies. However, if we lack direct moves,
13258
19.1k
    // then the final bitcasts from the extracted integer values to the
13259
19.1k
    // floating-point numbers turn into store/load pairs. Even with direct moves,
13260
19.1k
    // just loading the two floating-point numbers is likely better.
13261
19.1k
    auto ReplaceTwoFloatLoad = [&]() {
13262
19.1k
      if (VT != MVT::i64)
13263
16.1k
        return false;
13264
2.99k
13265
2.99k
      if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
13266
2.99k
          
LD->isVolatile()2.38k
)
13267
622
        return false;
13268
2.37k
13269
2.37k
      //  We're looking for a sequence like this:
13270
2.37k
      //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13271
2.37k
      //      t16: i64 = srl t13, Constant:i32<32>
13272
2.37k
      //    t17: i32 = truncate t16
13273
2.37k
      //  t18: f32 = bitcast t17
13274
2.37k
      //    t19: i32 = truncate t13
13275
2.37k
      //  t20: f32 = bitcast t19
13276
2.37k
13277
2.37k
      if (!LD->hasNUsesOfValue(2, 0))
13278
2.17k
        return false;
13279
195
13280
195
      auto UI = LD->use_begin();
13281
219
      while (UI.getUse().getResNo() != 0) 
++UI24
;
13282
195
      SDNode *Trunc = *UI++;
13283
202
      while (UI.getUse().getResNo() != 0) 
++UI7
;
13284
195
      SDNode *RightShift = *UI;
13285
195
      if (Trunc->getOpcode() != ISD::TRUNCATE)
13286
193
        std::swap(Trunc, RightShift);
13287
195
13288
195
      if (Trunc->getOpcode() != ISD::TRUNCATE ||
13289
195
          
Trunc->getValueType(0) != MVT::i322
||
13290
195
          
!Trunc->hasOneUse()2
)
13291
193
        return false;
13292
2
      if (RightShift->getOpcode() != ISD::SRL ||
13293
2
          !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
13294
2
          RightShift->getConstantOperandVal(1) != 32 ||
13295
2
          !RightShift->hasOneUse())
13296
0
        return false;
13297
2
13298
2
      SDNode *Trunc2 = *RightShift->use_begin();
13299
2
      if (Trunc2->getOpcode() != ISD::TRUNCATE ||
13300
2
          Trunc2->getValueType(0) != MVT::i32 ||
13301
2
          !Trunc2->hasOneUse())
13302
0
        return false;
13303
2
13304
2
      SDNode *Bitcast = *Trunc->use_begin();
13305
2
      SDNode *Bitcast2 = *Trunc2->use_begin();
13306
2
13307
2
      if (Bitcast->getOpcode() != ISD::BITCAST ||
13308
2
          Bitcast->getValueType(0) != MVT::f32)
13309
0
        return false;
13310
2
      if (Bitcast2->getOpcode() != ISD::BITCAST ||
13311
2
          Bitcast2->getValueType(0) != MVT::f32)
13312
0
        return false;
13313
2
13314
2
      if (Subtarget.isLittleEndian())
13315
0
        std::swap(Bitcast, Bitcast2);
13316
2
13317
2
      // Bitcast has the second float (in memory-layout order) and Bitcast2
13318
2
      // has the first one.
13319
2
13320
2
      SDValue BasePtr = LD->getBasePtr();
13321
2
      if (LD->isIndexed()) {
13322
0
        assert(LD->getAddressingMode() == ISD::PRE_INC &&
13323
0
               "Non-pre-inc AM on PPC?");
13324
0
        BasePtr =
13325
0
          DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
13326
0
                      LD->getOffset());
13327
0
      }
13328
2
13329
2
      auto MMOFlags =
13330
2
          LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
13331
2
      SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
13332
2
                                      LD->getPointerInfo(), LD->getAlignment(),
13333
2
                                      MMOFlags, LD->getAAInfo());
13334
2
      SDValue AddPtr =
13335
2
        DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
13336
2
                    BasePtr, DAG.getIntPtrConstant(4, dl));
13337
2
      SDValue FloatLoad2 = DAG.getLoad(
13338
2
          MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
13339
2
          LD->getPointerInfo().getWithOffset(4),
13340
2
          MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
13341
2
13342
2
      if (LD->isIndexed()) {
13343
0
        // Note that DAGCombine should re-form any pre-increment load(s) from
13344
0
        // what is produced here if that makes sense.
13345
0
        DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
13346
0
      }
13347
2
13348
2
      DCI.CombineTo(Bitcast2, FloatLoad);
13349
2
      DCI.CombineTo(Bitcast, FloatLoad2);
13350
2
13351
2
      DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 
20
: 1),
13352
2
                                    SDValue(FloatLoad2.getNode(), 1));
13353
2
      return true;
13354
2
    };
13355
19.1k
13356
19.1k
    if (ReplaceTwoFloatLoad())
13357
2
      return SDValue(N, 0);
13358
19.1k
13359
19.1k
    EVT MemVT = LD->getMemoryVT();
13360
19.1k
    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
13361
19.1k
    unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
13362
19.1k
    Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
13363
19.1k
    unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
13364
19.1k
    if (LD->isUnindexed() && 
VT.isVector()18.8k
&&
13365
19.1k
        
(5.52k
(5.52k
Subtarget.hasAltivec()5.52k
&&
ISD::isNON_EXTLoad(N)5.24k
&&
13366
5.52k
          // P8 and later hardware should just use LOAD.
13367
5.52k
          
!Subtarget.hasP8Vector()5.24k
&&
(1.34k
VT == MVT::v16i81.34k
||
VT == MVT::v8i161.23k
||
13368
1.34k
                                       
VT == MVT::v4i321.17k
||
VT == MVT::v4f32463
)) ||
13369
5.52k
         
(4.49k
Subtarget.hasQPX()4.49k
&&
(218
VT == MVT::v4f64218
||
VT == MVT::v4f32136
) &&
13370
4.49k
          
LD->getAlignment() >= ScalarABIAlignment97
)) &&
13371
19.1k
        
LD->getAlignment() < ABIAlignment1.12k
) {
13372
107
      // This is a type-legal unaligned Altivec or QPX load.
13373
107
      SDValue Chain = LD->getChain();
13374
107
      SDValue Ptr = LD->getBasePtr();
13375
107
      bool isLittleEndian = Subtarget.isLittleEndian();
13376
107
13377
107
      // This implements the loading of unaligned vectors as described in
13378
107
      // the venerable Apple Velocity Engine overview. Specifically:
13379
107
      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13380
107
      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13381
107
      //
13382
107
      // The general idea is to expand a sequence of one or more unaligned
13383
107
      // loads into an alignment-based permutation-control instruction (lvsl
13384
107
      // or lvsr), a series of regular vector loads (which always truncate
13385
107
      // their input address to an aligned address), and a series of
13386
107
      // permutations.  The results of these permutations are the requested
13387
107
      // loaded values.  The trick is that the last "extra" load is not taken
13388
107
      // from the address you might suspect (sizeof(vector) bytes after the
13389
107
      // last requested load), but rather sizeof(vector) - 1 bytes after the
13390
107
      // last requested vector. The point of this is to avoid a page fault if
13391
107
      // the base address happened to be aligned. This works because if the
13392
107
      // base address is aligned, then adding less than a full vector length
13393
107
      // will cause the last vector in the sequence to be (re)loaded.
13394
107
      // Otherwise, the next vector will be fetched as you might suspect was
13395
107
      // necessary.
13396
107
13397
107
      // We might be able to reuse the permutation generation from
13398
107
      // a different base address offset from this one by an aligned amount.
13399
107
      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13400
107
      // optimization later.
13401
107
      Intrinsic::ID Intr, IntrLD, IntrPerm;
13402
107
      MVT PermCntlTy, PermTy, LDTy;
13403
107
      if (Subtarget.hasAltivec()) {
13404
57
        Intr = isLittleEndian ?  
Intrinsic::ppc_altivec_lvsr1
:
13405
57
                                 
Intrinsic::ppc_altivec_lvsl56
;
13406
57
        IntrLD = Intrinsic::ppc_altivec_lvx;
13407
57
        IntrPerm = Intrinsic::ppc_altivec_vperm;
13408
57
        PermCntlTy = MVT::v16i8;
13409
57
        PermTy = MVT::v4i32;
13410
57
        LDTy = MVT::v4i32;
13411
57
      } else {
13412
50
        Intr =   MemVT == MVT::v4f64 ? 
Intrinsic::ppc_qpx_qvlpcld46
:
13413
50
                                       
Intrinsic::ppc_qpx_qvlpcls4
;
13414
50
        IntrLD = MemVT == MVT::v4f64 ? 
Intrinsic::ppc_qpx_qvlfd46
:
13415
50
                                       
Intrinsic::ppc_qpx_qvlfs4
;
13416
50
        IntrPerm = Intrinsic::ppc_qpx_qvfperm;
13417
50
        PermCntlTy = MVT::v4f64;
13418
50
        PermTy = MVT::v4f64;
13419
50
        LDTy = MemVT.getSimpleVT();
13420
50
      }
13421
107
13422
107
      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
13423
107
13424
107
      // Create the new MMO for the new base load. It is like the original MMO,
13425
107
      // but represents an area in memory almost twice the vector size centered
13426
107
      // on the original address. If the address is unaligned, we might start
13427
107
      // reading up to (sizeof(vector)-1) bytes below the address of the
13428
107
      // original unaligned load.
13429
107
      MachineFunction &MF = DAG.getMachineFunction();
13430
107
      MachineMemOperand *BaseMMO =
13431
107
        MF.getMachineMemOperand(LD->getMemOperand(),
13432
107
                                -(long)MemVT.getStoreSize()+1,
13433
107
                                2*MemVT.getStoreSize()-1);
13434
107
13435
107
      // Create the new base load.
13436
107
      SDValue LDXIntID =
13437
107
          DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
13438
107
      SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
13439
107
      SDValue BaseLoad =
13440
107
        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13441
107
                                DAG.getVTList(PermTy, MVT::Other),
13442
107
                                BaseLoadOps, LDTy, BaseMMO);
13443
107
13444
107
      // Note that the value of IncOffset (which is provided to the next
13445
107
      // load's pointer info offset value, and thus used to calculate the
13446
107
      // alignment), and the value of IncValue (which is actually used to
13447
107
      // increment the pointer value) are different! This is because we
13448
107
      // require the next load to appear to be aligned, even though it
13449
107
      // is actually offset from the base pointer by a lesser amount.
13450
107
      int IncOffset = VT.getSizeInBits() / 8;
13451
107
      int IncValue = IncOffset;
13452
107
13453
107
      // Walk (both up and down) the chain looking for another load at the real
13454
107
      // (aligned) offset (the alignment of the other load does not matter in
13455
107
      // this case). If found, then do not use the offset reduction trick, as
13456
107
      // that will prevent the loads from being later combined (as they would
13457
107
      // otherwise be duplicates).
13458
107
      if (!findConsecutiveLoad(LD, DAG))
13459
56
        --IncValue;
13460
107
13461
107
      SDValue Increment =
13462
107
          DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
13463
107
      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
13464
107
13465
107
      MachineMemOperand *ExtraMMO =
13466
107
        MF.getMachineMemOperand(LD->getMemOperand(),
13467
107
                                1, 2*MemVT.getStoreSize()-1);
13468
107
      SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
13469
107
      SDValue ExtraLoad =
13470
107
        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13471
107
                                DAG.getVTList(PermTy, MVT::Other),
13472
107
                                ExtraLoadOps, LDTy, ExtraMMO);
13473
107
13474
107
      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
13475
107
        BaseLoad.getValue(1), ExtraLoad.getValue(1));
13476
107
13477
107
      // Because vperm has a big-endian bias, we must reverse the order
13478
107
      // of the input vectors and complement the permute control vector
13479
107
      // when generating little endian code.  We have already handled the
13480
107
      // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13481
107
      // and ExtraLoad here.
13482
107
      SDValue Perm;
13483
107
      if (isLittleEndian)
13484
1
        Perm = BuildIntrinsicOp(IntrPerm,
13485
1
                                ExtraLoad, BaseLoad, PermCntl, DAG, dl);
13486
106
      else
13487
106
        Perm = BuildIntrinsicOp(IntrPerm,
13488
106
                                BaseLoad, ExtraLoad, PermCntl, DAG, dl);
13489
107
13490
107
      if (VT != PermTy)
13491
39
        Perm = Subtarget.hasAltivec() ?
13492
35
                 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
13493
39
                 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
13494
4
                               DAG.getTargetConstant(1, dl, MVT::i64));
13495
107
                               // second argument is 1 because this rounding
13496
107
                               // is always exact.
13497
107
13498
107
      // The output of the permutation is our loaded result, the TokenFactor is
13499
107
      // our new chain.
13500
107
      DCI.CombineTo(N, Perm, TF);
13501
107
      return SDValue(N, 0);
13502
107
    }
13503
19.0k
    }
13504
19.0k
    break;
13505
19.0k
    case ISD::INTRINSIC_WO_CHAIN: {
13506
1.98k
      bool isLittleEndian = Subtarget.isLittleEndian();
13507
1.98k
      unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
13508
1.98k
      Intrinsic::ID Intr = (isLittleEndian ? 
Intrinsic::ppc_altivec_lvsr431
13509
1.98k
                                           : 
Intrinsic::ppc_altivec_lvsl1.55k
);
13510
1.98k
      if ((IID == Intr ||
13511
1.98k
           
IID == Intrinsic::ppc_qpx_qvlpcld1.86k
||
13512
1.98k
           
IID == Intrinsic::ppc_qpx_qvlpcls1.73k
) &&
13513
1.98k
        
N->getOperand(1)->getOpcode() == ISD::ADD260
) {
13514
185
        SDValue Add = N->getOperand(1);
13515
185
13516
185
        int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
13517
110
                   5 /* 32 byte alignment */ : 
475
/* 16 byte alignment */;
13518
185
13519
185
        if (DAG.MaskedValueIsZero(Add->getOperand(1),
13520
185
                                  APInt::getAllOnesValue(Bits /* alignment */)
13521
185
                                      .zext(Add.getScalarValueSizeInBits()))) {
13522
179
          SDNode *BasePtr = Add->getOperand(0).getNode();
13523
179
          for (SDNode::use_iterator UI = BasePtr->use_begin(),
13524
179
                                    UE = BasePtr->use_end();
13525
3.39k
               UI != UE; 
++UI3.21k
) {
13526
3.22k
            if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13527
3.22k
                
cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID11
) {
13528
11
              // We've found another LVSL/LVSR, and this address is an aligned
13529
11
              // multiple of that one. The results will be the same, so use the
13530
11
              // one we've just found instead.
13531
11
13532
11
              return SDValue(*UI, 0);
13533
11
            }
13534
3.22k
          }
13535
179
        }
13536
185
13537
185
        
if (174
isa<ConstantSDNode>(Add->getOperand(1))174
) {
13538
168
          SDNode *BasePtr = Add->getOperand(0).getNode();
13539
168
          for (SDNode::use_iterator UI = BasePtr->use_begin(),
13540
471
               UE = BasePtr->use_end(); UI != UE; 
++UI303
) {
13541
471
            if (UI->getOpcode() == ISD::ADD &&
13542
471
                isa<ConstantSDNode>(UI->getOperand(1)) &&
13543
471
                (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
13544
471
                 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
13545
471
                (1ULL << Bits) == 0) {
13546
170
              SDNode *OtherAdd = *UI;
13547
170
              for (SDNode::use_iterator VI = OtherAdd->use_begin(),
13548
256
                   VE = OtherAdd->use_end(); VI != VE; 
++VI86
) {
13549
254
                if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13550
254
                    
cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID168
) {
13551
168
                  return SDValue(*VI, 0);
13552
168
                }
13553
254
              }
13554
170
            }
13555
471
          }
13556
168
        }
13557
174
      }
13558
1.98k
13559
1.98k
      // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13560
1.98k
      // Expose the vabsduw/h/b opportunity for down stream
13561
1.98k
      
if (1.80k
!DCI.isAfterLegalizeDAG()1.80k
&&
Subtarget.hasP9Altivec()957
&&
13562
1.80k
          
(157
IID == Intrinsic::ppc_altivec_vmaxsw157
||
13563
157
           
IID == Intrinsic::ppc_altivec_vmaxsh145
||
13564
157
           
IID == Intrinsic::ppc_altivec_vmaxsb133
)) {
13565
36
        SDValue V1 = N->getOperand(1);
13566
36
        SDValue V2 = N->getOperand(2);
13567
36
        if ((V1.getSimpleValueType() == MVT::v4i32 ||
13568
36
             
V1.getSimpleValueType() == MVT::v8i1624
||
13569
36
             
V1.getSimpleValueType() == MVT::v16i812
) &&
13570
36
            V1.getSimpleValueType() == V2.getSimpleValueType()) {
13571
36
          // (0-a, a)
13572
36
          if (V1.getOpcode() == ISD::SUB &&
13573
36
              
ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode())22
&&
13574
36
              
V1.getOperand(1) == V22
) {
13575
2
            return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
13576
2
          }
13577
34
          // (a, 0-a)
13578
34
          if (V2.getOpcode() == ISD::SUB &&
13579
34
              ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
13580
34
              V2.getOperand(1) == V1) {
13581
34
            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13582
34
          }
13583
0
          // (x-y, y-x)
13584
0
          if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
13585
0
              V1.getOperand(0) == V2.getOperand(1) &&
13586
0
              V1.getOperand(1) == V2.getOperand(0)) {
13587
0
            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13588
0
          }
13589
1.77k
        }
13590
36
      }
13591
1.77k
    }
13592
1.77k
13593
1.77k
    break;
13594
1.77k
  case ISD::INTRINSIC_W_CHAIN:
13595
811
    // For little endian, VSX loads require generating lxvd2x/xxswapd.
13596
811
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13597
811
    if (Subtarget.needsSwapsForVSXMemOps()) {
13598
20
      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13599
20
      default:
13600
6
        break;
13601
20
      case Intrinsic::ppc_vsx_lxvw4x:
13602
14
      case Intrinsic::ppc_vsx_lxvd2x:
13603
14
        return expandVSXLoadForLE(N, DCI);
13604
797
      }
13605
797
    }
13606
797
    break;
13607
2.26k
  case ISD::INTRINSIC_VOID:
13608
2.26k
    // For little endian, VSX stores require generating xxswapd/stxvd2x.
13609
2.26k
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13610
2.26k
    if (Subtarget.needsSwapsForVSXMemOps()) {
13611
1.75k
      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13612
1.75k
      default:
13613
1.74k
        break;
13614
1.75k
      case Intrinsic::ppc_vsx_stxvw4x:
13615
12
      case Intrinsic::ppc_vsx_stxvd2x:
13616
12
        return expandVSXStoreForLE(N, DCI);
13617
2.25k
      }
13618
2.25k
    }
13619
2.25k
    break;
13620
2.25k
  case ISD::BSWAP:
13621
47
    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13622
47
    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
13623
47
        
N->getOperand(0).hasOneUse()32
&&
13624
47
        
(32
N->getValueType(0) == MVT::i3232
||
N->getValueType(0) == MVT::i1621
||
13625
32
         
(14
Subtarget.hasLDBRX()14
&&
Subtarget.isPPC64()12
&&
13626
29
          
N->getValueType(0) == MVT::i6411
))) {
13627
29
      SDValue Load = N->getOperand(0);
13628
29
      LoadSDNode *LD = cast<LoadSDNode>(Load);
13629
29
      // Create the byte-swapping load.
13630
29
      SDValue Ops[] = {
13631
29
        LD->getChain(),    // Chain
13632
29
        LD->getBasePtr(),  // Ptr
13633
29
        DAG.getValueType(N->getValueType(0)) // VT
13634
29
      };
13635
29
      SDValue BSLoad =
13636
29
        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
13637
29
                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
13638
18
                                              
MVT::i6411
: MVT::i32, MVT::Other),
13639
29
                                Ops, LD->getMemoryVT(), LD->getMemOperand());
13640
29
13641
29
      // If this is an i16 load, insert the truncate.
13642
29
      SDValue ResVal = BSLoad;
13643
29
      if (N->getValueType(0) == MVT::i16)
13644
7
        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
13645
29
13646
29
      // First, combine the bswap away.  This makes the value produced by the
13647
29
      // load dead.
13648
29
      DCI.CombineTo(N, ResVal);
13649
29
13650
29
      // Next, combine the load away, we give it a bogus result value but a real
13651
29
      // chain result.  The result value is dead because the bswap is dead.
13652
29
      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
13653
29
13654
29
      // Return N so it doesn't get rechecked!
13655
29
      return SDValue(N, 0);
13656
29
    }
13657
18
    break;
13658
18
  case PPCISD::VCMP:
13659
13
    // If a VCMPo node already exists with exactly the same operands as this
13660
13
    // node, use its result instead of this node (VCMPo computes both a CR6 and
13661
13
    // a normal output).
13662
13
    //
13663
13
    if (!N->getOperand(0).hasOneUse() &&
13664
13
        
!N->getOperand(1).hasOneUse()1
&&
13665
13
        
!N->getOperand(2).hasOneUse()1
) {
13666
1
13667
1
      // Scan all of the users of the LHS, looking for VCMPo's that match.
13668
1
      SDNode *VCMPoNode = nullptr;
13669
1
13670
1
      SDNode *LHSN = N->getOperand(0).getNode();
13671
1
      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
13672
1
           UI != E; 
++UI0
)
13673
1
        if (UI->getOpcode() == PPCISD::VCMPo &&
13674
1
            UI->getOperand(1) == N->getOperand(1) &&
13675
1
            UI->getOperand(2) == N->getOperand(2) &&
13676
1
            UI->getOperand(0) == N->getOperand(0)) {
13677
1
          VCMPoNode = *UI;
13678
1
          break;
13679
1
        }
13680
1
13681
1
      // If there is no VCMPo node, or if the flag value has a single use, don't
13682
1
      // transform this.
13683
1
      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
13684
0
        break;
13685
1
13686
1
      // Look at the (necessarily single) use of the flag value.  If it has a
13687
1
      // chain, this transformation is more complex.  Note that multiple things
13688
1
      // could use the value result, which we should ignore.
13689
1
      SDNode *FlagUser = nullptr;
13690
1
      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
13691
2
           FlagUser == nullptr; 
++UI1
) {
13692
1
        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
13693
1
        SDNode *User = *UI;
13694
2
        for (unsigned i = 0, e = User->getNumOperands(); i != e; 
++i1
) {
13695
2
          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
13696
1
            FlagUser = User;
13697
1
            break;
13698
1
          }
13699
2
        }
13700
1
      }
13701
1
13702
1
      // If the user is a MFOCRF instruction, we know this is safe.
13703
1
      // Otherwise we give up for right now.
13704
1
      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
13705
1
        return SDValue(VCMPoNode, 0);
13706
12
    }
13707
12
    break;
13708
816
  case ISD::BRCOND: {
13709
816
    SDValue Cond = N->getOperand(1);
13710
816
    SDValue Target = N->getOperand(2);
13711
816
13712
816
    if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13713
816
        cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
13714
169
          Intrinsic::loop_decrement) {
13715
169
13716
169
      // We now need to make the intrinsic dead (it cannot be instruction
13717
169
      // selected).
13718
169
      DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
13719
169
      assert(Cond.getNode()->hasOneUse() &&
13720
169
             "Counter decrement has more than one use");
13721
169
13722
169
      return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
13723
169
                         N->getOperand(0), Target);
13724
169
    }
13725
647
  }
13726
647
  break;
13727
2.44k
  case ISD::BR_CC: {
13728
2.44k
    // If this is a branch on an altivec predicate comparison, lower this so
13729
2.44k
    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
13730
2.44k
    // lowering is done pre-legalize, because the legalizer lowers the predicate
13731
2.44k
    // compare down to code that is difficult to reassemble.
13732
2.44k
    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
13733
2.44k
    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
13734
2.44k
13735
2.44k
    // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13736
2.44k
    // value. If so, pass-through the AND to get to the intrinsic.
13737
2.44k
    if (LHS.getOpcode() == ISD::AND &&
13738
2.44k
        
LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN203
&&
13739
2.44k
        cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
13740
1
          Intrinsic::loop_decrement &&
13741
2.44k
        
isa<ConstantSDNode>(LHS.getOperand(1))1
&&
13742
2.44k
        
!isNullConstant(LHS.getOperand(1))1
)
13743
1
      LHS = LHS.getOperand(0);
13744
2.44k
13745
2.44k
    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13746
2.44k
        cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
13747
7
          Intrinsic::loop_decrement &&
13748
2.44k
        
isa<ConstantSDNode>(RHS)7
) {
13749
7
      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
13750
7
             "Counter decrement comparison is not EQ or NE");
13751
7
13752
7
      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13753
7
      bool isBDNZ = (CC == ISD::SETEQ && 
Val1
) ||
13754
7
                    (CC == ISD::SETNE && 
!Val6
);
13755
7
13756
7
      // We now need to make the intrinsic dead (it cannot be instruction
13757
7
      // selected).
13758
7
      DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
13759
7
      assert(LHS.getNode()->hasOneUse() &&
13760
7
             "Counter decrement has more than one use");
13761
7
13762
7
      return DAG.getNode(isBDNZ ? 
PPCISD::BDNZ0
: PPCISD::BDZ, dl, MVT::Other,
13763
7
                         N->getOperand(0), N->getOperand(4));
13764
7
    }
13765
2.43k
13766
2.43k
    int CompareOpc;
13767
2.43k
    bool isDot;
13768
2.43k
13769
2.43k
    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13770
2.43k
        
isa<ConstantSDNode>(RHS)6
&&
(6
CC == ISD::SETEQ6
||
CC == ISD::SETNE3
) &&
13771
2.43k
        
getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)6
) {
13772
6
      assert(isDot && "Can't compare against a vector result!");
13773
6
13774
6
      // If this is a comparison against something other than 0/1, then we know
13775
6
      // that the condition is never/always true.
13776
6
      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13777
6
      if (Val != 0 && 
Val != 10
) {
13778
0
        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
13779
0
          return N->getOperand(0);
13780
0
        // Always !=, turn it into an unconditional branch.
13781
0
        return DAG.getNode(ISD::BR, dl, MVT::Other,
13782
0
                           N->getOperand(0), N->getOperand(4));
13783
0
      }
13784
6
13785
6
      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
13786
6
13787
6
      // Create the PPCISD altivec 'dot' comparison node.
13788
6
      SDValue Ops[] = {
13789
6
        LHS.getOperand(2),  // LHS of compare
13790
6
        LHS.getOperand(3),  // RHS of compare
13791
6
        DAG.getConstant(CompareOpc, dl, MVT::i32)
13792
6
      };
13793
6
      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
13794
6
      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
13795
6
13796
6
      // Unpack the result based on how the target uses it.
13797
6
      PPC::Predicate CompOpc;
13798
6
      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
13799
6
      default:  // Can't happen, don't crash on invalid number though.
13800
1
      case 0:   // Branch on the value of the EQ bit of CR6.
13801
1
        CompOpc = BranchOnWhenPredTrue ? 
PPC::PRED_EQ0
: PPC::PRED_NE;
13802
1
        break;
13803
1
      case 1:   // Branch on the inverted value of the EQ bit of CR6.
13804
1
        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : 
PPC::PRED_EQ0
;
13805
1
        break;
13806
4
      case 2:   // Branch on the value of the LT bit of CR6.
13807
4
        CompOpc = BranchOnWhenPredTrue ? 
PPC::PRED_LT2
:
PPC::PRED_GE2
;
13808
4
        break;
13809
0
      case 3:   // Branch on the inverted value of the LT bit of CR6.
13810
0
        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
13811
0
        break;
13812
6
      }
13813
6
13814
6
      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
13815
6
                         DAG.getConstant(CompOpc, dl, MVT::i32),
13816
6
                         DAG.getRegister(PPC::CR6, MVT::i32),
13817
6
                         N->getOperand(4), CompNode.getValue(1));
13818
6
    }
13819
2.42k
    break;
13820
2.42k
  }
13821
4.95k
  case ISD::BUILD_VECTOR:
13822
4.95k
    return DAGCombineBuildVector(N, DCI);
13823
2.42k
  case ISD::ABS:
13824
132
    return combineABS(N, DCI);
13825
2.42k
  case ISD::VSELECT:
13826
36
    return combineVSelect(N, DCI);
13827
96.0k
  }
13828
96.0k
13829
96.0k
  return SDValue();
13830
96.0k
}
13831
13832
SDValue
13833
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
13834
                                 SelectionDAG &DAG,
13835
15
                                 SmallVectorImpl<SDNode *> &Created) const {
13836
15
  // fold (sdiv X, pow2)
13837
15
  EVT VT = N->getValueType(0);
13838
15
  if (VT == MVT::i64 && 
!Subtarget.isPPC64()4
)
13839
2
    return SDValue();
13840
13
  if ((VT != MVT::i32 && 
VT != MVT::i642
) ||
13841
13
      !(Divisor.isPowerOf2() || 
(-Divisor).isPowerOf2()3
))
13842
0
    return SDValue();
13843
13
13844
13
  SDLoc DL(N);
13845
13
  SDValue N0 = N->getOperand(0);
13846
13
13847
13
  bool IsNegPow2 = (-Divisor).isPowerOf2();
13848
13
  unsigned Lg2 = (IsNegPow2 ? 
-Divisor3
:
Divisor10
).countTrailingZeros();
13849
13
  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
13850
13
13851
13
  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
13852
13
  Created.push_back(Op.getNode());
13853
13
13854
13
  if (IsNegPow2) {
13855
3
    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
13856
3
    Created.push_back(Op.getNode());
13857
3
  }
13858
13
13859
13
  return Op;
13860
13
}
13861
13862
//===----------------------------------------------------------------------===//
13863
// Inline Assembly Support
13864
//===----------------------------------------------------------------------===//
13865
13866
void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
13867
                                                      KnownBits &Known,
13868
                                                      const APInt &DemandedElts,
13869
                                                      const SelectionDAG &DAG,
13870
7.14k
                                                      unsigned Depth) const {
13871
7.14k
  Known.resetAll();
13872
7.14k
  switch (Op.getOpcode()) {
13873
7.14k
  
default: break6.84k
;
13874
7.14k
  case PPCISD::LBRX: {
13875
59
    // lhbrx is known to have the top bits cleared out.
13876
59
    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
13877
5
      Known.Zero = 0xFFFF0000;
13878
59
    break;
13879
7.14k
  }
13880
7.14k
  case ISD::INTRINSIC_WO_CHAIN: {
13881
243
    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
13882
243
    default: break;
13883
243
    case Intrinsic::ppc_altivec_vcmpbfp_p:
13884
0
    case Intrinsic::ppc_altivec_vcmpeqfp_p:
13885
0
    case Intrinsic::ppc_altivec_vcmpequb_p:
13886
0
    case Intrinsic::ppc_altivec_vcmpequh_p:
13887
0
    case Intrinsic::ppc_altivec_vcmpequw_p:
13888
0
    case Intrinsic::ppc_altivec_vcmpequd_p:
13889
0
    case Intrinsic::ppc_altivec_vcmpgefp_p:
13890
0
    case Intrinsic::ppc_altivec_vcmpgtfp_p:
13891
0
    case Intrinsic::ppc_altivec_vcmpgtsb_p:
13892
0
    case Intrinsic::ppc_altivec_vcmpgtsh_p:
13893
0
    case Intrinsic::ppc_altivec_vcmpgtsw_p:
13894
0
    case Intrinsic::ppc_altivec_vcmpgtsd_p:
13895
0
    case Intrinsic::ppc_altivec_vcmpgtub_p:
13896
0
    case Intrinsic::ppc_altivec_vcmpgtuh_p:
13897
0
    case Intrinsic::ppc_altivec_vcmpgtuw_p:
13898
0
    case Intrinsic::ppc_altivec_vcmpgtud_p:
13899
0
      Known.Zero = ~1U;  // All bits but the low one are known to be zero.
13900
0
      break;
13901
243
    }
13902
243
  }
13903
7.14k
  }
13904
7.14k
}
13905
13906
1.66k
unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
13907
1.66k
  switch (Subtarget.getDarwinDirective()) {
13908
1.66k
  
default: break403
;
13909
1.66k
  case PPC::DIR_970:
13910
1.26k
  case PPC::DIR_PWR4:
13911
1.26k
  case PPC::DIR_PWR5:
13912
1.26k
  case PPC::DIR_PWR5X:
13913
1.26k
  case PPC::DIR_PWR6:
13914
1.26k
  case PPC::DIR_PWR6X:
13915
1.26k
  case PPC::DIR_PWR7:
13916
1.26k
  case PPC::DIR_PWR8:
13917
1.26k
  case PPC::DIR_PWR9: {
13918
1.26k
    if (!ML)
13919
0
      break;
13920
1.26k
13921
1.26k
    if (!DisableInnermostLoopAlign32) {
13922
1.21k
      // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
13923
1.21k
      // so that we can decrease cache misses and branch-prediction misses.
13924
1.21k
      // Actual alignment of the loop will depend on the hotness check and other
13925
1.21k
      // logic in alignBlocks.
13926
1.21k
      if (ML->getLoopDepth() > 1 && 
ML->getSubLoops().empty()45
)
13927
45
        return 5;
13928
1.22k
    }
13929
1.22k
13930
1.22k
    const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13931
1.22k
13932
1.22k
    // For small loops (between 5 and 8 instructions), align to a 32-byte
13933
1.22k
    // boundary so that the entire loop fits in one instruction-cache line.
13934
1.22k
    uint64_t LoopSize = 0;
13935
4.21k
    for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; 
++I2.99k
)
13936
9.47k
      
for (auto J = (*I)->begin(), JE = (*I)->end(); 2.99k
J != JE;
++J6.48k
) {
13937
7.58k
        LoopSize += TII->getInstSizeInBytes(*J);
13938
7.58k
        if (LoopSize > 32)
13939
1.09k
          break;
13940
7.58k
      }
13941
1.22k
13942
1.22k
    if (LoopSize > 16 && 
LoopSize <= 32860
)
13943
620
      return 5;
13944
600
13945
600
    break;
13946
600
  }
13947
1.00k
  }
13948
1.00k
13949
1.00k
  return TargetLowering::getPrefLoopAlignment(ML);
13950
1.00k
}
13951
13952
/// getConstraintType - Given a constraint, return the type of
13953
/// constraint it is for this target.
13954
PPCTargetLowering::ConstraintType
13955
8.98k
PPCTargetLowering::getConstraintType(StringRef Constraint) const {
13956
8.98k
  if (Constraint.size() == 1) {
13957
2.15k
    switch (Constraint[0]) {
13958
2.15k
    
default: break677
;
13959
2.15k
    case 'b':
13960
1.47k
    case 'r':
13961
1.47k
    case 'f':
13962
1.47k
    case 'd':
13963
1.47k
    case 'v':
13964
1.47k
    case 'y':
13965
1.47k
      return C_RegisterClass;
13966
1.47k
    case 'Z':
13967
4
      // FIXME: While Z does indicate a memory constraint, it specifically
13968
4
      // indicates an r+r address (used in conjunction with the 'y' modifier
13969
4
      // in the replacement string). Currently, we're forcing the base
13970
4
      // register to be r0 in the asm printer (which is interpreted as zero)
13971
4
      // and forming the complete address in the second register. This is
13972
4
      // suboptimal.
13973
4
      return C_Memory;
13974
6.83k
    }
13975
6.83k
  } else if (Constraint == "wc") { // individual CR bits.
13976
156
    return C_RegisterClass;
13977
6.68k
  } else if (Constraint == "wa" || 
Constraint == "wd"6.66k
||
13978
6.68k
             
Constraint == "wf"6.65k
||
Constraint == "ws"6.65k
||
13979
6.68k
             
Constraint == "wi"6.64k
||
Constraint == "ww"6.62k
) {
13980
80
    return C_RegisterClass; // VSX registers.
13981
80
  }
13982
7.27k
  return TargetLowering::getConstraintType(Constraint);
13983
7.27k
}
13984
13985
/// Examine constraint type and operand type and determine a weight value.
13986
/// This object must already have been set up with the operand type
13987
/// and the current alternative constraint selected.
13988
TargetLowering::ConstraintWeight
13989
PPCTargetLowering::getSingleConstraintMatchWeight(
13990
486
    AsmOperandInfo &info, const char *constraint) const {
13991
486
  ConstraintWeight weight = CW_Invalid;
13992
486
  Value *CallOperandVal = info.CallOperandVal;
13993
486
    // If we don't have a value, we can't do a match,
13994
486
    // but allow it at the lowest weight.
13995
486
  if (!CallOperandVal)
13996
204
    return CW_Default;
13997
282
  Type *type = CallOperandVal->getType();
13998
282
13999
282
  // Look at the constraint type.
14000
282
  if (StringRef(constraint) == "wc" && 
type->isIntegerTy(1)0
)
14001
0
    return CW_Register; // an individual CR bit.
14002
282
  else if ((StringRef(constraint) == "wa" ||
14003
282
            StringRef(constraint) == "wd" ||
14004
282
            StringRef(constraint) == "wf") &&
14005
282
           
type->isVectorTy()0
)
14006
0
    return CW_Register;
14007
282
  else if (StringRef(constraint) == "wi" && 
type->isIntegerTy(64)0
)
14008
0
    return CW_Register; // just hold 64-bit integers data.
14009
282
  else if (StringRef(constraint) == "ws" && 
type->isDoubleTy()0
)
14010
0
    return CW_Register;
14011
282
  else if (StringRef(constraint) == "ww" && 
type->isFloatTy()0
)
14012
0
    return CW_Register;
14013
282
14014
282
  switch (*constraint) {
14015
282
  default:
14016
282
    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
14017
282
    break;
14018
282
  case 'b':
14019
0
    if (type->isIntegerTy())
14020
0
      weight = CW_Register;
14021
0
    break;
14022
282
  case 'f':
14023
0
    if (type->isFloatTy())
14024
0
      weight = CW_Register;
14025
0
    break;
14026
282
  case 'd':
14027
0
    if (type->isDoubleTy())
14028
0
      weight = CW_Register;
14029
0
    break;
14030
282
  case 'v':
14031
0
    if (type->isVectorTy())
14032
0
      weight = CW_Register;
14033
0
    break;
14034
282
  case 'y':
14035
0
    weight = CW_Register;
14036
0
    break;
14037
282
  case 'Z':
14038
0
    weight = CW_Memory;
14039
0
    break;
14040
282
  }
14041
282
  return weight;
14042
282
}
14043
14044
std::pair<unsigned, const TargetRegisterClass *>
14045
PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
14046
                                                StringRef Constraint,
14047
3.75k
                                                MVT VT) const {
14048
3.75k
  if (Constraint.size() == 1) {
14049
457
    // GCC RS6000 Constraint Letters
14050
457
    switch (Constraint[0]) {
14051
457
    case 'b':   // R1-R31
14052
8
      if (VT == MVT::i64 && 
Subtarget.isPPC64()6
)
14053
6
        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
14054
2
      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
14055
380
    case 'r':   // R0-R31
14056
380
      if (VT == MVT::i64 && 
Subtarget.isPPC64()69
)
14057
69
        return std::make_pair(0U, &PPC::G8RCRegClass);
14058
311
      return std::make_pair(0U, &PPC::GPRCRegClass);
14059
311
    // 'd' and 'f' constraints are both defined to be "the floating point
14060
311
    // registers", where one is for 32-bit and the other for 64-bit. We don't
14061
311
    // really care overly much here so just give them all the same reg classes.
14062
311
    case 'd':
14063
29
    case 'f':
14064
29
      if (Subtarget.hasSPE()) {
14065
4
        if (VT == MVT::f32 || 
VT == MVT::i323
)
14066
3
          return std::make_pair(0U, &PPC::SPE4RCRegClass);
14067
1
        if (VT == MVT::f64 || 
VT == MVT::i640
)
14068
1
          return std::make_pair(0U, &PPC::SPERCRegClass);
14069
25
      } else {
14070
25
        if (VT == MVT::f32 || 
VT == MVT::i3219
)
14071
6
          return std::make_pair(0U, &PPC::F4RCRegClass);
14072
19
        if (VT == MVT::f64 || 
VT == MVT::i642
)
14073
19
          return std::make_pair(0U, &PPC::F8RCRegClass);
14074
0
        if (VT == MVT::v4f64 && Subtarget.hasQPX())
14075
0
          return std::make_pair(0U, &PPC::QFRCRegClass);
14076
0
        if (VT == MVT::v4f32 && Subtarget.hasQPX())
14077
0
          return std::make_pair(0U, &PPC::QSRCRegClass);
14078
0
      }
14079
0
      break;
14080
0
    case 'v':
14081
0
      if (VT == MVT::v4f64 && Subtarget.hasQPX())
14082
0
        return std::make_pair(0U, &PPC::QFRCRegClass);
14083
0
      if (VT == MVT::v4f32 && Subtarget.hasQPX())
14084
0
        return std::make_pair(0U, &PPC::QSRCRegClass);
14085
0
      if (Subtarget.hasAltivec())
14086
0
        return std::make_pair(0U, &PPC::VRRCRegClass);
14087
0
      break;
14088
0
    case 'y':   // crrc
14089
0
      return std::make_pair(0U, &PPC::CRRCRegClass);
14090
3.29k
    }
14091
3.29k
  } else if (Constraint == "wc" && 
Subtarget.useCRBits()37
) {
14092
36
    // An individual CR bit.
14093
36
    return std::make_pair(0U, &PPC::CRBITRCRegClass);
14094
3.26k
  } else if ((Constraint == "wa" || 
Constraint == "wd"3.25k
||
14095
3.26k
             
Constraint == "wf"3.25k
||
Constraint == "wi"3.25k
) &&
14096
3.26k
             
Subtarget.hasVSX()6
) {
14097
4
    return std::make_pair(0U, &PPC::VSRCRegClass);
14098
3.25k
  } else if ((Constraint == "ws" || 
Constraint == "ww"3.25k
) &&
Subtarget.hasVSX()6
) {
14099
4
    if (VT == MVT::f32 && 
Subtarget.hasP8Vector()3
)
14100
3
      return std::make_pair(0U, &PPC::VSSRCRegClass);
14101
1
    else
14102
1
      return std::make_pair(0U, &PPC::VSFRCRegClass);
14103
3.29k
  }
14104
3.29k
14105
3.29k
  std::pair<unsigned, const TargetRegisterClass *> R =
14106
3.29k
      TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
14107
3.29k
14108
3.29k
  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14109
3.29k
  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14110
3.29k
  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14111
3.29k
  // register.
14112
3.29k
  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14113
3.29k
  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14114
3.29k
  if (R.first && 
VT == MVT::i643.13k
&&
Subtarget.isPPC64()76
&&
14115
3.29k
      
PPC::GPRCRegClass.contains(R.first)76
)
14116
76
    return std::make_pair(TRI->getMatchingSuperReg(R.first,
14117
76
                            PPC::sub_32, &PPC::G8RCRegClass),
14118
76
                          &PPC::G8RCRegClass);
14119
3.21k
14120
3.21k
  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14121
3.21k
  if (!R.second && 
StringRef("{cc}").equals_lower(Constraint)155
) {
14122
16
    R.first = PPC::CR0;
14123
16
    R.second = &PPC::CRRCRegClass;
14124
16
  }
14125
3.21k
14126
3.21k
  return R;
14127
3.21k
}
14128
14129
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14130
/// vector.  If it is invalid, don't add anything to Ops.
14131
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
14132
                                                     std::string &Constraint,
14133
                                                     std::vector<SDValue>&Ops,
14134
80
                                                     SelectionDAG &DAG) const {
14135
80
  SDValue Result;
14136
80
14137
80
  // Only support length 1 constraints.
14138
80
  if (Constraint.length() > 1) 
return0
;
14139
80
14140
80
  char Letter = Constraint[0];
14141
80
  switch (Letter) {
14142
80
  
default: break59
;
14143
80
  case 'I':
14144
21
  case 'J':
14145
21
  case 'K':
14146
21
  case 'L':
14147
21
  case 'M':
14148
21
  case 'N':
14149
21
  case 'O':
14150
21
  case 'P': {
14151
21
    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
14152
21
    if (!CST) 
return3
; // Must be an immediate to match.
14153
18
    SDLoc dl(Op);
14154
18
    int64_t Value = CST->getSExtValue();
14155
18
    EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
14156
18
                         // numbers are printed as such.
14157
18
    switch (Letter) {
14158
18
    
default: 0
llvm_unreachable0
("Unknown constraint letter!");
14159
18
    case 'I':  // "I" is a signed 16-bit constant.
14160
4
      if (isInt<16>(Value))
14161
4
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14162
4
      break;
14163
18
    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
14164
0
      if (isShiftedUInt<16, 16>(Value))
14165
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14166
0
      break;
14167
18
    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
14168
0
      if (isShiftedInt<16, 16>(Value))
14169
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14170
0
      break;
14171
18
    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
14172
0
      if (isUInt<16>(Value))
14173
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14174
0
      break;
14175
18
    case 'M':  // "M" is a constant that is greater than 31.
14176
0
      if (Value > 31)
14177
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14178
0
      break;
14179
18
    case 'N':  // "N" is a positive constant that is an exact power of two.
14180
0
      if (Value > 0 && isPowerOf2_64(Value))
14181
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14182
0
      break;
14183
18
    case 'O':  // "O" is the constant zero.
14184
14
      if (Value == 0)
14185
14
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14186
14
      break;
14187
18
    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
14188
0
      if (isInt<16>(-Value))
14189
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
14190
0
      break;
14191
18
    }
14192
18
    break;
14193
18
  }
14194
77
  }
14195
77
14196
77
  if (Result.getNode()) {
14197
18
    Ops.push_back(Result);
14198
18
    return;
14199
18
  }
14200
59
14201
59
  // Handle standard constraint letters.
14202
59
  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
14203
59
}
14204
14205
// isLegalAddressingMode - Return true if the addressing mode represented
14206
// by AM is legal for this target, for a load/store of the specified type.
14207
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
14208
                                              const AddrMode &AM, Type *Ty,
14209
109k
                                              unsigned AS, Instruction *I) const {
14210
109k
  // PPC does not allow r+i addressing modes for vectors!
14211
109k
  if (Ty->isVectorTy() && 
AM.BaseOffs != 019.9k
)
14212
3.88k
    return false;
14213
105k
14214
105k
  // PPC allows a sign-extended 16-bit immediate field.
14215
105k
  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
14216
38
    return false;
14217
105k
14218
105k
  // No global is ever allowed as a base.
14219
105k
  if (AM.BaseGV)
14220
3.60k
    return false;
14221
101k
14222
101k
  // PPC only support r+r,
14223
101k
  switch (AM.Scale) {
14224
101k
  case 0:  // "r+i" or just "i", depending on HasBaseReg.
14225
28.1k
    break;
14226
101k
  case 1:
14227
64.8k
    if (AM.HasBaseReg && 
AM.BaseOffs63.9k
) // "r+r+i" is not allowed.
14228
8.32k
      return false;
14229
56.5k
    // Otherwise we have r+r or r+i.
14230
56.5k
    break;
14231
56.5k
  case 2:
14232
256
    if (AM.HasBaseReg || 
AM.BaseOffs128
) // 2*r+r or 2*r+i is not allowed.
14233
128
      return false;
14234
128
    // Allow 2*r as r+r.
14235
128
    break;
14236
8.23k
  default:
14237
8.23k
    // No other scales are supported.
14238
8.23k
    return false;
14239
84.8k
  }
14240
84.8k
14241
84.8k
  return true;
14242
84.8k
}
14243
14244
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
14245
7
                                           SelectionDAG &DAG) const {
14246
7
  MachineFunction &MF = DAG.getMachineFunction();
14247
7
  MachineFrameInfo &MFI = MF.getFrameInfo();
14248
7
  MFI.setReturnAddressIsTaken(true);
14249
7
14250
7
  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
14251
0
    return SDValue();
14252
7
14253
7
  SDLoc dl(Op);
14254
7
  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14255
7
14256
7
  // Make sure the function does not optimize away the store of the RA to
14257
7
  // the stack.
14258
7
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
14259
7
  FuncInfo->setLRStoreRequired();
14260
7
  bool isPPC64 = Subtarget.isPPC64();
14261
7
  auto PtrVT = getPointerTy(MF.getDataLayout());
14262
7
14263
7
  if (Depth > 0) {
14264
2
    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
14265
2
    SDValue Offset =
14266
2
        DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
14267
2
                        isPPC64 ? 
MVT::i640
: MVT::i32);
14268
2
    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
14269
2
                       DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
14270
2
                       MachinePointerInfo());
14271
2
  }
14272
5
14273
5
  // Just load the return address off the stack.
14274
5
  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
14275
5
  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
14276
5
                     MachinePointerInfo());
14277
5
}
14278
14279
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
14280
12
                                          SelectionDAG &DAG) const {
14281
12
  SDLoc dl(Op);
14282
12
  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14283
12
14284
12
  MachineFunction &MF = DAG.getMachineFunction();
14285
12
  MachineFrameInfo &MFI = MF.getFrameInfo();
14286
12
  MFI.setFrameAddressIsTaken(true);
14287
12
14288
12
  EVT PtrVT = getPointerTy(MF.getDataLayout());
14289
12
  bool isPPC64 = PtrVT == MVT::i64;
14290
12
14291
12
  // Naked functions never have a frame pointer, and so we use r1. For all
14292
12
  // other functions, this decision must be delayed until during PEI.
14293
12
  unsigned FrameReg;
14294
12
  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
14295
1
    FrameReg = isPPC64 ? PPC::X1 : 
PPC::R10
;
14296
11
  else
14297
11
    FrameReg = isPPC64 ? 
PPC::FP87
:
PPC::FP4
;
14298
12
14299
12
  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
14300
12
                                         PtrVT);
14301
16
  while (Depth--)
14302
4
    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
14303
4
                            FrameAddr, MachinePointerInfo());
14304
12
  return FrameAddr;
14305
12
}
14306
14307
// FIXME? Maybe this could be a TableGen attribute on some registers and
14308
// this table could be generated automatically from RegInfo.
14309
unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
14310
13
                                              SelectionDAG &DAG) const {
14311
13
  bool isPPC64 = Subtarget.isPPC64();
14312
13
  bool isDarwinABI = Subtarget.isDarwinABI();
14313
13
14314
13
  if ((isPPC64 && 
VT != MVT::i648
&&
VT != MVT::i324
) ||
14315
13
      (!isPPC64 && 
VT != MVT::i325
))
14316
0
    report_fatal_error("Invalid register global variable type");
14317
13
14318
13
  bool is64Bit = isPPC64 && 
VT == MVT::i648
;
14319
13
  unsigned Reg = StringSwitch<unsigned>(RegName)
14320
13
                   .Case("r1", is64Bit ? 
PPC::X14
:
PPC::R19
)
14321
13
                   .Case("r2", (isDarwinABI || isPPC64) ? 
08
:
PPC::R25
)
14322
13
                   .Case("r13", (!isPPC64 && 
isDarwinABI5
) ?
00
:
14323
13
                                  (is64Bit ? 
PPC::X134
:
PPC::R139
))
14324
13
                   .Default(0);
14325
13
14326
13
  if (Reg)
14327
7
    return Reg;
14328
6
  report_fatal_error("Invalid register name global variable");
14329
6
}
14330
14331
3.50k
bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
14332
3.50k
  // 32-bit SVR4 ABI access everything as got-indirect.
14333
3.50k
  if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
14334
0
    return true;
14335
3.50k
14336
3.50k
  CodeModel::Model CModel = getTargetMachine().getCodeModel();
14337
3.50k
  // If it is small or large code model, module locals are accessed
14338
3.50k
  // indirectly by loading their address from .toc/.got. The difference
14339
3.50k
  // is that for large code model we have ADDIStocHA8 + LDtocL and for
14340
3.50k
  // small code model we simply have LDtoc.
14341
3.50k
  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
14342
26
    return true;
14343
3.47k
14344
3.47k
  // JumpTable and BlockAddress are accessed as got-indirect.
14345
3.47k
  if (isa<JumpTableSDNode>(GA) || 
isa<BlockAddressSDNode>(GA)3.47k
)
14346
11
    return true;
14347
3.46k
14348
3.46k
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
14349
1.77k
    const GlobalValue *GV = G->getGlobal();
14350
1.77k
    unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
14351
1.77k
    // The NLP flag indicates that a global access has to use an
14352
1.77k
    // extra indirection.
14353
1.77k
    if (GVFlags & PPCII::MO_NLP_FLAG)
14354
990
      return true;
14355
2.47k
  }
14356
2.47k
14357
2.47k
  return false;
14358
2.47k
}
14359
14360
bool
14361
1.33k
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
14362
1.33k
  // The PowerPC target isn't yet aware of offsets.
14363
1.33k
  return false;
14364
1.33k
}
14365
14366
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
14367
                                           const CallInst &I,
14368
                                           MachineFunction &MF,
14369
1.49k
                                           unsigned Intrinsic) const {
14370
1.49k
  switch (Intrinsic) {
14371
1.49k
  case Intrinsic::ppc_qpx_qvlfd:
14372
22
  case Intrinsic::ppc_qpx_qvlfs:
14373
22
  case Intrinsic::ppc_qpx_qvlfcd:
14374
22
  case Intrinsic::ppc_qpx_qvlfcs:
14375
22
  case Intrinsic::ppc_qpx_qvlfiwa:
14376
22
  case Intrinsic::ppc_qpx_qvlfiwz:
14377
22
  case Intrinsic::ppc_altivec_lvx:
14378
22
  case Intrinsic::ppc_altivec_lvxl:
14379
22
  case Intrinsic::ppc_altivec_lvebx:
14380
22
  case Intrinsic::ppc_altivec_lvehx:
14381
22
  case Intrinsic::ppc_altivec_lvewx:
14382
22
  case Intrinsic::ppc_vsx_lxvd2x:
14383
22
  case Intrinsic::ppc_vsx_lxvw4x: {
14384
22
    EVT VT;
14385
22
    switch (Intrinsic) {
14386
22
    case Intrinsic::ppc_altivec_lvebx:
14387
0
      VT = MVT::i8;
14388
0
      break;
14389
22
    case Intrinsic::ppc_altivec_lvehx:
14390
0
      VT = MVT::i16;
14391
0
      break;
14392
22
    case Intrinsic::ppc_altivec_lvewx:
14393
0
      VT = MVT::i32;
14394
0
      break;
14395
22
    case Intrinsic::ppc_vsx_lxvd2x:
14396
9
      VT = MVT::v2f64;
14397
9
      break;
14398
22
    case Intrinsic::ppc_qpx_qvlfd:
14399
0
      VT = MVT::v4f64;
14400
0
      break;
14401
22
    case Intrinsic::ppc_qpx_qvlfs:
14402
0
      VT = MVT::v4f32;
14403
0
      break;
14404
22
    case Intrinsic::ppc_qpx_qvlfcd:
14405
0
      VT = MVT::v2f64;
14406
0
      break;
14407
22
    case Intrinsic::ppc_qpx_qvlfcs:
14408
0
      VT = MVT::v2f32;
14409
0
      break;
14410
22
    default:
14411
13
      VT = MVT::v4i32;
14412
13
      break;
14413
22
    }
14414
22
14415
22
    Info.opc = ISD::INTRINSIC_W_CHAIN;
14416
22
    Info.memVT = VT;
14417
22
    Info.ptrVal = I.getArgOperand(0);
14418
22
    Info.offset = -VT.getStoreSize()+1;
14419
22
    Info.size = 2*VT.getStoreSize()-1;
14420
22
    Info.align = 1;
14421
22
    Info.flags = MachineMemOperand::MOLoad;
14422
22
    return true;
14423
22
  }
14424
22
  case Intrinsic::ppc_qpx_qvlfda:
14425
0
  case Intrinsic::ppc_qpx_qvlfsa:
14426
0
  case Intrinsic::ppc_qpx_qvlfcda:
14427
0
  case Intrinsic::ppc_qpx_qvlfcsa:
14428
0
  case Intrinsic::ppc_qpx_qvlfiwaa:
14429
0
  case Intrinsic::ppc_qpx_qvlfiwza: {
14430
0
    EVT VT;
14431
0
    switch (Intrinsic) {
14432
0
    case Intrinsic::ppc_qpx_qvlfda:
14433
0
      VT = MVT::v4f64;
14434
0
      break;
14435
0
    case Intrinsic::ppc_qpx_qvlfsa:
14436
0
      VT = MVT::v4f32;
14437
0
      break;
14438
0
    case Intrinsic::ppc_qpx_qvlfcda:
14439
0
      VT = MVT::v2f64;
14440
0
      break;
14441
0
    case Intrinsic::ppc_qpx_qvlfcsa:
14442
0
      VT = MVT::v2f32;
14443
0
      break;
14444
0
    default:
14445
0
      VT = MVT::v4i32;
14446
0
      break;
14447
0
    }
14448
0
14449
0
    Info.opc = ISD::INTRINSIC_W_CHAIN;
14450
0
    Info.memVT = VT;
14451
0
    Info.ptrVal = I.getArgOperand(0);
14452
0
    Info.offset = 0;
14453
0
    Info.size = VT.getStoreSize();
14454
0
    Info.align = 1;
14455
0
    Info.flags = MachineMemOperand::MOLoad;
14456
0
    return true;
14457
0
  }
14458
20
  case Intrinsic::ppc_qpx_qvstfd:
14459
20
  case Intrinsic::ppc_qpx_qvstfs:
14460
20
  case Intrinsic::ppc_qpx_qvstfcd:
14461
20
  case Intrinsic::ppc_qpx_qvstfcs:
14462
20
  case Intrinsic::ppc_qpx_qvstfiw:
14463
20
  case Intrinsic::ppc_altivec_stvx:
14464
20
  case Intrinsic::ppc_altivec_stvxl:
14465
20
  case Intrinsic::ppc_altivec_stvebx:
14466
20
  case Intrinsic::ppc_altivec_stvehx:
14467
20
  case Intrinsic::ppc_altivec_stvewx:
14468
20
  case Intrinsic::ppc_vsx_stxvd2x:
14469
20
  case Intrinsic::ppc_vsx_stxvw4x: {
14470
20
    EVT VT;
14471
20
    switch (Intrinsic) {
14472
20
    case Intrinsic::ppc_altivec_stvebx:
14473
0
      VT = MVT::i8;
14474
0
      break;
14475
20
    case Intrinsic::ppc_altivec_stvehx:
14476
0
      VT = MVT::i16;
14477
0
      break;
14478
20
    case Intrinsic::ppc_altivec_stvewx:
14479
0
      VT = MVT::i32;
14480
0
      break;
14481
20
    case Intrinsic::ppc_vsx_stxvd2x:
14482
9
      VT = MVT::v2f64;
14483
9
      break;
14484
20
    case Intrinsic::ppc_qpx_qvstfd:
14485
0
      VT = MVT::v4f64;
14486
0
      break;
14487
20
    case Intrinsic::ppc_qpx_qvstfs:
14488
0
      VT = MVT::v4f32;
14489
0
      break;
14490
20
    case Intrinsic::ppc_qpx_qvstfcd:
14491
0
      VT = MVT::v2f64;
14492
0
      break;
14493
20
    case Intrinsic::ppc_qpx_qvstfcs:
14494
0
      VT = MVT::v2f32;
14495
0
      break;
14496
20
    default:
14497
11
      VT = MVT::v4i32;
14498
11
      break;
14499
20
    }
14500
20
14501
20
    Info.opc = ISD::INTRINSIC_VOID;
14502
20
    Info.memVT = VT;
14503
20
    Info.ptrVal = I.getArgOperand(1);
14504
20
    Info.offset = -VT.getStoreSize()+1;
14505
20
    Info.size = 2*VT.getStoreSize()-1;
14506
20
    Info.align = 1;
14507
20
    Info.flags = MachineMemOperand::MOStore;
14508
20
    return true;
14509
20
  }
14510
20
  case Intrinsic::ppc_qpx_qvstfda:
14511
0
  case Intrinsic::ppc_qpx_qvstfsa:
14512
0
  case Intrinsic::ppc_qpx_qvstfcda:
14513
0
  case Intrinsic::ppc_qpx_qvstfcsa:
14514
0
  case Intrinsic::ppc_qpx_qvstfiwa: {
14515
0
    EVT VT;
14516
0
    switch (Intrinsic) {
14517
0
    case Intrinsic::ppc_qpx_qvstfda:
14518
0
      VT = MVT::v4f64;
14519
0
      break;
14520
0
    case Intrinsic::ppc_qpx_qvstfsa:
14521
0
      VT = MVT::v4f32;
14522
0
      break;
14523
0
    case Intrinsic::ppc_qpx_qvstfcda:
14524
0
      VT = MVT::v2f64;
14525
0
      break;
14526
0
    case Intrinsic::ppc_qpx_qvstfcsa:
14527
0
      VT = MVT::v2f32;
14528
0
      break;
14529
0
    default:
14530
0
      VT = MVT::v4i32;
14531
0
      break;
14532
0
    }
14533
0
14534
0
    Info.opc = ISD::INTRINSIC_VOID;
14535
0
    Info.memVT = VT;
14536
0
    Info.ptrVal = I.getArgOperand(1);
14537
0
    Info.offset = 0;
14538
0
    Info.size = VT.getStoreSize();
14539
0
    Info.align = 1;
14540
0
    Info.flags = MachineMemOperand::MOStore;
14541
0
    return true;
14542
0
  }
14543
1.45k
  default:
14544
1.45k
    break;
14545
1.45k
  }
14546
1.45k
14547
1.45k
  return false;
14548
1.45k
}
14549
14550
/// getOptimalMemOpType - Returns the target specific optimal type for load
14551
/// and store operations as a result of memset, memcpy, and memmove
14552
/// lowering. If DstAlign is zero that means it's safe to destination
14553
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14554
/// means there isn't a need to check it against alignment requirement,
14555
/// probably because the source does not need to be loaded. If 'IsMemset' is
14556
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14557
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14558
/// source is constant so it does not need to be loaded.
14559
/// It returns EVT::Other if the type should be determined using generic
14560
/// target-independent logic.
14561
EVT PPCTargetLowering::getOptimalMemOpType(
14562
    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
14563
    bool ZeroMemset, bool MemcpyStrSrc,
14564
140
    const AttributeList &FuncAttributes) const {
14565
140
  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
14566
63
    // When expanding a memset, require at least two QPX instructions to cover
14567
63
    // the cost of loading the value to be stored from the constant pool.
14568
63
    if (Subtarget.hasQPX() && 
Size >= 325
&&
(5
!IsMemset5
||
Size >= 643
) &&
14569
63
       
(5
!SrcAlign5
||
SrcAlign >= 322
) &&
(3
!DstAlign3
||
DstAlign >= 323
) &&
14570
63
        
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)2
) {
14571
2
      return MVT::v4f64;
14572
2
    }
14573
61
14574
61
    // We should use Altivec/VSX loads and stores when available. For unaligned
14575
61
    // addresses, unaligned VSX loads are only fast starting with the P8.
14576
61
    if (Subtarget.hasAltivec() && 
Size >= 1649
&&
14577
61
        
(40
(40
(40
!SrcAlign40
||
SrcAlign >= 1621
) &&
(27
!DstAlign27
||
DstAlign >= 1617
)) ||
14578
40
         
(25
(25
IsMemset25
&&
Subtarget.hasVSX()9
) ||
Subtarget.hasP8Vector()16
)))
14579
37
      return MVT::v4i32;
14580
101
  }
14581
101
14582
101
  if (Subtarget.isPPC64()) {
14583
97
    return MVT::i64;
14584
97
  }
14585
4
14586
4
  return MVT::i32;
14587
4
}
14588
14589
/// Returns true if it is beneficial to convert a load of a constant
14590
/// to just the constant itself.
14591
bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
14592
0
                                                          Type *Ty) const {
14593
0
  assert(Ty->isIntegerTy());
14594
0
14595
0
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
14596
0
  return !(BitSize == 0 || BitSize > 64);
14597
0
}
14598
14599
4.34k
bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
14600
4.34k
  if (!Ty1->isIntegerTy() || 
!Ty2->isIntegerTy()4.34k
)
14601
1
    return false;
14602
4.34k
  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
14603
4.34k
  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
14604
4.34k
  return NumBits1 == 64 && 
NumBits2 == 321.23k
;
14605
4.34k
}
14606
14607
3.79k
bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
14608
3.79k
  if (!VT1.isInteger() || !VT2.isInteger())
14609
0
    return false;
14610
3.79k
  unsigned NumBits1 = VT1.getSizeInBits();
14611
3.79k
  unsigned NumBits2 = VT2.getSizeInBits();
14612
3.79k
  return NumBits1 == 64 && 
NumBits2 == 32751
;
14613
3.79k
}
14614
14615
3.13k
bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
14616
3.13k
  // Generally speaking, zexts are not free, but they are free when they can be
14617
3.13k
  // folded with other operations.
14618
3.13k
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
14619
235
    EVT MemVT = LD->getMemoryVT();
14620
235
    if ((MemVT == MVT::i1 || 
MemVT == MVT::i8233
||
MemVT == MVT::i16229
||
14621
235
         
(217
Subtarget.isPPC64()217
&&
MemVT == MVT::i32198
)) &&
14622
235
        
(81
LD->getExtensionType() == ISD::NON_EXTLOAD81
||
14623
81
         
LD->getExtensionType() == ISD::ZEXTLOAD0
))
14624
81
      return true;
14625
3.05k
  }
14626
3.05k
14627
3.05k
  // FIXME: Add other cases...
14628
3.05k
  //  - 32-bit shifts with a zext to i64
14629
3.05k
  //  - zext after ctlz, bswap, etc.
14630
3.05k
  //  - zext after and by a constant mask
14631
3.05k
14632
3.05k
  return TargetLowering::isZExtFree(Val, VT2);
14633
3.05k
}
14634
14635
56
bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
14636
56
  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
14637
56
         "invalid fpext types");
14638
56
  // Extending to float128 is not free.
14639
56
  if (DestVT == MVT::f128)
14640
0
    return false;
14641
56
  return true;
14642
56
}
14643
14644
4.97k
bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
14645
4.97k
  return isInt<16>(Imm) || 
isUInt<16>(Imm)107
;
14646
4.97k
}
14647
14648
4.43k
bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
14649
4.43k
  return isInt<16>(Imm) || 
isUInt<16>(Imm)187
;
14650
4.43k
}
14651
14652
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
14653
                                                       unsigned,
14654
                                                       unsigned,
14655
                                                       MachineMemOperand::Flags,
14656
1.57k
                                                       bool *Fast) const {
14657
1.57k
  if (DisablePPCUnaligned)
14658
38
    return false;
14659
1.53k
14660
1.53k
  // PowerPC supports unaligned memory access for simple non-vector types.
14661
1.53k
  // Although accessing unaligned addresses is not as efficient as accessing
14662
1.53k
  // aligned addresses, it is generally more efficient than manual expansion,
14663
1.53k
  // and generally only traps for software emulation when crossing page
14664
1.53k
  // boundaries.
14665
1.53k
14666
1.53k
  if (!VT.isSimple())
14667
0
    return false;
14668
1.53k
14669
1.53k
  if (VT.getSimpleVT().isVector()) {
14670
1.04k
    if (Subtarget.hasVSX()) {
14671
673
      if (VT != MVT::v2f64 && 
VT != MVT::v2i64409
&&
14672
673
          
VT != MVT::v4f32365
&&
VT != MVT::v4i32365
)
14673
0
        return false;
14674
375
    } else {
14675
375
      return false;
14676
375
    }
14677
1.15k
  }
14678
1.15k
14679
1.15k
  if (VT == MVT::ppcf128)
14680
0
    return false;
14681
1.15k
14682
1.15k
  if (Fast)
14683
104
    *Fast = true;
14684
1.15k
14685
1.15k
  return true;
14686
1.15k
}
14687
14688
1.47k
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
14689
1.47k
  VT = VT.getScalarType();
14690
1.47k
14691
1.47k
  if (!VT.isSimple())
14692
0
    return false;
14693
1.47k
14694
1.47k
  switch (VT.getSimpleVT().SimpleTy) {
14695
1.47k
  case MVT::f32:
14696
1.32k
  case MVT::f64:
14697
1.32k
    return true;
14698
1.32k
  case MVT::f128:
14699
137
    return (EnableQuadPrecision && 
Subtarget.hasP9Vector()131
);
14700
1.32k
  default:
14701
13
    break;
14702
13
  }
14703
13
14704
13
  return false;
14705
13
}
14706
14707
const MCPhysReg *
14708
59
PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
14709
59
  // LR is a callee-save register, but we must treat it as clobbered by any call
14710
59
  // site. Hence we include LR in the scratch registers, which are in turn added
14711
59
  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14712
59
  // to CTR, which is used by any indirect call.
14713
59
  static const MCPhysReg ScratchRegs[] = {
14714
59
    PPC::X12, PPC::LR8, PPC::CTR8, 0
14715
59
  };
14716
59
14717
59
  return ScratchRegs;
14718
59
}
14719
14720
unsigned PPCTargetLowering::getExceptionPointerRegister(
14721
72
    const Constant *PersonalityFn) const {
14722
72
  return Subtarget.isPPC64() ? PPC::X3 : 
PPC::R30
;
14723
72
}
14724
14725
unsigned PPCTargetLowering::getExceptionSelectorRegister(
14726
36
    const Constant *PersonalityFn) const {
14727
36
  return Subtarget.isPPC64() ? PPC::X4 : 
PPC::R40
;
14728
36
}
14729
14730
bool
14731
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14732
446
                     EVT VT , unsigned DefinedValues) const {
14733
446
  if (VT == MVT::v2i64)
14734
86
    return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
14735
360
14736
360
  if (Subtarget.hasVSX() || 
Subtarget.hasQPX()40
)
14737
338
    return true;
14738
22
14739
22
  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14740
22
}
14741
14742
121k
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
14743
121k
  if (DisableILPPref || 
Subtarget.enableMachineScheduler()120k
)
14744
121k
    return TargetLowering::getSchedulingPreference(N);
14745
0
14746
0
  return Sched::ILP;
14747
0
}
14748
14749
// Create a fast isel object.
14750
FastISel *
14751
PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
14752
804
                                  const TargetLibraryInfo *LibInfo) const {
14753
804
  return PPC::createFastISel(FuncInfo, LibInfo);
14754
804
}
14755
14756
3
void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
14757
3
  if (Subtarget.isDarwinABI()) 
return0
;
14758
3
  if (!Subtarget.isPPC64()) 
return0
;
14759
3
14760
3
  // Update IsSplitCSR in PPCFunctionInfo
14761
3
  PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
14762
3
  PFI->setIsSplitCSR(true);
14763
3
}
14764
14765
void PPCTargetLowering::insertCopiesSplitCSR(
14766
  MachineBasicBlock *Entry,
14767
3
  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
14768
3
  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
14769
3
  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
14770
3
  if (!IStart)
14771
0
    return;
14772
3
14773
3
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14774
3
  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
14775
3
  MachineBasicBlock::iterator MBBI = Entry->begin();
14776
158
  for (const MCPhysReg *I = IStart; *I; 
++I155
) {
14777
155
    const TargetRegisterClass *RC = nullptr;
14778
155
    if (PPC::G8RCRegClass.contains(*I))
14779
56
      RC = &PPC::G8RCRegClass;
14780
99
    else if (PPC::F8RCRegClass.contains(*I))
14781
54
      RC = &PPC::F8RCRegClass;
14782
45
    else if (PPC::CRRCRegClass.contains(*I))
14783
9
      RC = &PPC::CRRCRegClass;
14784
36
    else if (PPC::VRRCRegClass.contains(*I))
14785
36
      RC = &PPC::VRRCRegClass;
14786
36
    else
14787
36
      
llvm_unreachable0
("Unexpected register class in CSRsViaCopy!");
14788
155
14789
155
    unsigned NewVR = MRI->createVirtualRegister(RC);
14790
155
    // Create copy from CSR to a virtual register.
14791
155
    // FIXME: this currently does not emit CFI pseudo-instructions, it works
14792
155
    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14793
155
    // nounwind. If we want to generalize this later, we may need to emit
14794
155
    // CFI pseudo-instructions.
14795
155
    assert(Entry->getParent()->getFunction().hasFnAttribute(
14796
155
             Attribute::NoUnwind) &&
14797
155
           "Function should be nounwind in insertCopiesSplitCSR!");
14798
155
    Entry->addLiveIn(*I);
14799
155
    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
14800
155
      .addReg(*I);
14801
155
14802
155
    // Insert the copy-back instructions right before the terminator.
14803
155
    for (auto *Exit : Exits)
14804
155
      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
14805
155
              TII->get(TargetOpcode::COPY), *I)
14806
155
        .addReg(NewVR);
14807
155
  }
14808
3
}
14809
14810
// Override to enable LOAD_STACK_GUARD lowering on Linux.
14811
12
bool PPCTargetLowering::useLoadStackGuardNode() const {
14812
12
  if (!Subtarget.isTargetLinux())
14813
3
    return TargetLowering::useLoadStackGuardNode();
14814
9
  return true;
14815
9
}
14816
14817
// Override to disable global variable loading on Linux.
14818
5
void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
14819
5
  if (!Subtarget.isTargetLinux())
14820
2
    return TargetLowering::insertSSPDeclarations(M);
14821
5
}
14822
14823
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
14824
949
                                     bool ForCodeSize) const {
14825
949
  if (!VT.isSimple() || !Subtarget.hasVSX())
14826
226
    return false;
14827
723
14828
723
  switch(VT.getSimpleVT().SimpleTy) {
14829
723
  default:
14830
6
    // For FP types that are currently not supported by PPC backend, return
14831
6
    // false. Examples: f16, f80.
14832
6
    return false;
14833
723
  case MVT::f32:
14834
717
  case MVT::f64:
14835
717
  case MVT::ppcf128:
14836
717
    return Imm.isPosZero();
14837
723
  }
14838
723
}
14839
14840
// For vector shift operation op, fold
14841
// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14842
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
14843
3.71k
                                  SelectionDAG &DAG) {
14844
3.71k
  SDValue N0 = N->getOperand(0);
14845
3.71k
  SDValue N1 = N->getOperand(1);
14846
3.71k
  EVT VT = N0.getValueType();
14847
3.71k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
14848
3.71k
  unsigned Opcode = N->getOpcode();
14849
3.71k
  unsigned TargetOpcode;
14850
3.71k
14851
3.71k
  switch (Opcode) {
14852
3.71k
  default:
14853
0
    llvm_unreachable("Unexpected shift operation");
14854
3.71k
  case ISD::SHL:
14855
2.28k
    TargetOpcode = PPCISD::SHL;
14856
2.28k
    break;
14857
3.71k
  case ISD::SRL:
14858
940
    TargetOpcode = PPCISD::SRL;
14859
940
    break;
14860
3.71k
  case ISD::SRA:
14861
490
    TargetOpcode = PPCISD::SRA;
14862
490
    break;
14863
3.71k
  }
14864
3.71k
14865
3.71k
  if (VT.isVector() && 
TLI.isOperationLegal(Opcode, VT)603
&&
14866
3.71k
      
N1->getOpcode() == ISD::AND578
)
14867
16
    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
14868
16
      if (Mask->getZExtValue() == OpSizeInBits - 1)
14869
16
        return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
14870
3.70k
14871
3.70k
  return SDValue();
14872
3.70k
}
14873
14874
2.28k
SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
14875
2.28k
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14876
6
    return Value;
14877
2.28k
14878
2.28k
  SDValue N0 = N->getOperand(0);
14879
2.28k
  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14880
2.28k
  if (!Subtarget.isISA3_0() ||
14881
2.28k
      
N0.getOpcode() != ISD::SIGN_EXTEND604
||
14882
2.28k
      
N0.getOperand(0).getValueType() != MVT::i32138
||
14883
2.28k
      
CN1 == nullptr138
||
N->getValueType(0) != MVT::i64138
)
14884
2.14k
    return SDValue();
14885
138
14886
138
  // We can't save an operation here if the value is already extended, and
14887
138
  // the existing shift is easier to combine.
14888
138
  SDValue ExtsSrc = N0.getOperand(0);
14889
138
  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
14890
138
      
ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext129
)
14891
129
    return SDValue();
14892
9
14893
9
  SDLoc DL(N0);
14894
9
  SDValue ShiftBy = SDValue(CN1, 0);
14895
9
  // We want the shift amount to be i32 on the extswli, but the shift could
14896
9
  // have an i64.
14897
9
  if (ShiftBy.getValueType() == MVT::i64)
14898
9
    ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
14899
9
14900
9
  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
14901
9
                         ShiftBy);
14902
9
}
14903
14904
490
SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
14905
490
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14906
4
    return Value;
14907
486
14908
486
  return SDValue();
14909
486
}
14910
14911
940
SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
14912
940
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14913
6
    return Value;
14914
934
14915
934
  return SDValue();
14916
934
}
14917
14918
// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
14919
// Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
14920
// When C is zero, the equation (addi Z, -C) can be simplified to Z
14921
// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
14922
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
14923
16.7k
                                 const PPCSubtarget &Subtarget) {
14924
16.7k
  if (!Subtarget.isPPC64())
14925
2.54k
    return SDValue();
14926
14.2k
14927
14.2k
  SDValue LHS = N->getOperand(0);
14928
14.2k
  SDValue RHS = N->getOperand(1);
14929
14.2k
14930
28.4k
  auto isZextOfCompareWithConstant = [](SDValue Op) {
14931
28.4k
    if (Op.getOpcode() != ISD::ZERO_EXTEND || 
!Op.hasOneUse()179
||
14932
28.4k
        
Op.getValueType() != MVT::i64171
)
14933
28.3k
      return false;
14934
96
14935
96
    SDValue Cmp = Op.getOperand(0);
14936
96
    if (Cmp.getOpcode() != ISD::SETCC || 
!Cmp.hasOneUse()43
||
14937
96
        
Cmp.getOperand(0).getValueType() != MVT::i6443
)
14938
57
      return false;
14939
39
14940
39
    if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
14941
31
      int64_t NegConstant = 0 - Constant->getSExtValue();
14942
31
      // Due to the limitations of the addi instruction,
14943
31
      // -C is required to be [-32768, 32767].
14944
31
      return isInt<16>(NegConstant);
14945
31
    }
14946
8
14947
8
    return false;
14948
8
  };
14949
14.2k
14950
14.2k
  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
14951
14.2k
  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
14952
14.2k
14953
14.2k
  // If there is a pattern, canonicalize a zext operand to the RHS.
14954
14.2k
  if (LHSHasPattern && 
!RHSHasPattern12
)
14955
12
    std::swap(LHS, RHS);
14956
14.2k
  else if (!LHSHasPattern && !RHSHasPattern)
14957
14.2k
    return SDValue();
14958
13
14959
13
  SDLoc DL(N);
14960
13
  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
14961
13
  SDValue Cmp = RHS.getOperand(0);
14962
13
  SDValue Z = Cmp.getOperand(0);
14963
13
  auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
14964
13
14965
13
  assert(Constant && "Constant Should not be a null pointer.");
14966
13
  int64_t NegConstant = 0 - Constant->getSExtValue();
14967
13
14968
13
  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
14969
13
  
default: break0
;
14970
13
  case ISD::SETNE: {
14971
7
    //                                 when C == 0
14972
7
    //                             --> addze X, (addic Z, -1).carry
14973
7
    //                            /
14974
7
    // add X, (zext(setne Z, C))--
14975
7
    //                            \    when -32768 <= -C <= 32767 && C != 0
14976
7
    //                             --> addze X, (addic (addi Z, -C), -1).carry
14977
7
    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14978
7
                              DAG.getConstant(NegConstant, DL, MVT::i64));
14979
7
    SDValue AddOrZ = NegConstant != 0 ? 
Add4
:
Z3
;
14980
7
    SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14981
7
                               AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
14982
7
    return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14983
7
                       SDValue(Addc.getNode(), 1));
14984
13
    }
14985
13
  case ISD::SETEQ: {
14986
6
    //                                 when C == 0
14987
6
    //                             --> addze X, (subfic Z, 0).carry
14988
6
    //                            /
14989
6
    // add X, (zext(sete  Z, C))--
14990
6
    //                            \    when -32768 <= -C <= 32767 && C != 0
14991
6
    //                             --> addze X, (subfic (addi Z, -C), 0).carry
14992
6
    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14993
6
                              DAG.getConstant(NegConstant, DL, MVT::i64));
14994
6
    SDValue AddOrZ = NegConstant != 0 ? 
Add4
:
Z2
;
14995
6
    SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14996
6
                               DAG.getConstant(0, DL, MVT::i64), AddOrZ);
14997
6
    return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14998
6
                       SDValue(Subc.getNode(), 1));
14999
0
    }
15000
0
  }
15001
0
15002
0
  return SDValue();
15003
0
}
15004
15005
16.7k
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
15006
16.7k
  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
15007
13
    return Value;
15008
16.7k
15009
16.7k
  return SDValue();
15010
16.7k
}
15011
15012
// Detect TRUNCATE operations on bitcasts of float128 values.
15013
// What we are looking for here is the situtation where we extract a subset
15014
// of bits from a 128 bit float.
15015
// This can be of two forms:
15016
// 1) BITCAST of f128 feeding TRUNCATE
15017
// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
15018
// The reason this is required is because we do not have a legal i128 type
15019
// and so we want to prevent having to store the f128 and then reload part
15020
// of it.
15021
SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
15022
10.6k
                                           DAGCombinerInfo &DCI) const {
15023
10.6k
  // If we are using CRBits then try that first.
15024
10.6k
  if (Subtarget.useCRBits()) {
15025
10.0k
    // Check if CRBits did anything and return that if it did.
15026
10.0k
    if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
15027
0
      return CRTruncValue;
15028
10.6k
  }
15029
10.6k
15030
10.6k
  SDLoc dl(N);
15031
10.6k
  SDValue Op0 = N->getOperand(0);
15032
10.6k
15033
10.6k
  // Looking for a truncate of i128 to i64.
15034
10.6k
  if (Op0.getValueType() != MVT::i128 || 
N->getValueType(0) != MVT::i6440
)
15035
10.5k
    return SDValue();
15036
23
15037
23
  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 
112
:
011
;
15038
23
15039
23
  // SRL feeding TRUNCATE.
15040
23
  if (Op0.getOpcode() == ISD::SRL) {
15041
18
    ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
15042
18
    // The right shift has to be by 64 bits.
15043
18
    if (!ConstNode || 
ConstNode->getZExtValue() != 648
)
15044
12
      return SDValue();
15045
6
15046
6
    // Switch the element number to extract.
15047
6
    EltToExtract = EltToExtract ? 
02
:
14
;
15048
6
    // Update Op0 past the SRL.
15049
6
    Op0 = Op0.getOperand(0);
15050
6
  }
15051
23
15052
23
  // BITCAST feeding a TRUNCATE possibly via SRL.
15053
23
  
if (11
Op0.getOpcode() == ISD::BITCAST11
&&
15054
11
      
Op0.getValueType() == MVT::i1284
&&
15055
11
      
Op0.getOperand(0).getValueType() == MVT::f1284
) {
15056
4
    SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
15057
4
    return DCI.DAG.getNode(
15058
4
        ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
15059
4
        DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
15060
4
  }
15061
7
  return SDValue();
15062
7
}
15063
15064
784
SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
15065
784
  SelectionDAG &DAG = DCI.DAG;
15066
784
15067
784
  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
15068
784
  if (!ConstOpOrElement)
15069
425
    return SDValue();
15070
359
15071
359
  // An imul is usually smaller than the alternative sequence for legal type.
15072
359
  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
15073
359
      
isOperationLegal(ISD::MUL, N->getValueType(0))48
)
15074
48
    return SDValue();
15075
311
15076
311
  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
15077
190
    switch (this->Subtarget.getDarwinDirective()) {
15078
190
    default:
15079
115
      // TODO: enhance the condition for subtarget before pwr8
15080
115
      return false;
15081
190
    case PPC::DIR_PWR8:
15082
39
      //  type        mul     add    shl
15083
39
      // scalar        4       1      1
15084
39
      // vector        7       2      2
15085
39
      return true;
15086
190
    case PPC::DIR_PWR9:
15087
36
      //  type        mul     add    shl
15088
36
      // scalar        5       2      2
15089
36
      // vector        7       2      2
15090
36
15091
36
      // The cycle RATIO of related operations are showed as a table above.
15092
36
      // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15093
36
      // scalar and vector type. For 2 instrs patterns, add/sub + shl
15094
36
      // are 4, it is always profitable; but for 3 instrs patterns
15095
36
      // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15096
36
      // So we should only do it for vector type.
15097
36
      return IsAddOne && 
IsNeg17
?
VT.isVector()8
:
true28
;
15098
190
    }
15099
190
  };
15100
311
15101
311
  EVT VT = N->getValueType(0);
15102
311
  SDLoc DL(N);
15103
311
15104
311
  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
15105
311
  bool IsNeg = MulAmt.isNegative();
15106
311
  APInt MulAmtAbs = MulAmt.abs();
15107
311
15108
311
  if ((MulAmtAbs - 1).isPowerOf2()) {
15109
122
    // (mul x, 2^N + 1) => (add (shl x, N), x)
15110
122
    // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15111
122
15112
122
    if (!IsProfitable(IsNeg, true, VT))
15113
89
      return SDValue();
15114
33
15115
33
    SDValue Op0 = N->getOperand(0);
15116
33
    SDValue Op1 =
15117
33
        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15118
33
                    DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
15119
33
    SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
15120
33
15121
33
    if (!IsNeg)
15122
21
      return Res;
15123
12
15124
12
    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
15125
189
  } else if ((MulAmtAbs + 1).isPowerOf2()) {
15126
68
    // (mul x, 2^N - 1) => (sub (shl x, N), x)
15127
68
    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15128
68
15129
68
    if (!IsProfitable(IsNeg, false, VT))
15130
30
      return SDValue();
15131
38
15132
38
    SDValue Op0 = N->getOperand(0);
15133
38
    SDValue Op1 =
15134
38
        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15135
38
                    DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
15136
38
15137
38
    if (!IsNeg)
15138
26
      return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
15139
12
    else
15140
12
      return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
15141
121
15142
121
  } else {
15143
121
    return SDValue();
15144
121
  }
15145
311
}
15146
15147
180
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
15148
180
  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15149
180
  if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
15150
4
    return false;
15151
176
15152
176
  // If not a tail call then no need to proceed.
15153
176
  if (!CI->isTailCall())
15154
30
    return false;
15155
146
15156
146
  // If tail calls are disabled for the caller then we are done.
15157
146
  const Function *Caller = CI->getParent()->getParent();
15158
146
  auto Attr = Caller->getFnAttribute("disable-tail-calls");
15159
146
  if (Attr.getValueAsString() == "true")
15160
0
    return false;
15161
146
15162
146
  // If sibling calls have been disabled and tail-calls aren't guaranteed
15163
146
  // there is no reason to duplicate.
15164
146
  auto &TM = getTargetMachine();
15165
146
  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
15166
0
    return false;
15167
146
15168
146
  // Can't tail call a function called indirectly, or if it has variadic args.
15169
146
  const Function *Callee = CI->getCalledFunction();
15170
146
  if (!Callee || 
Callee->isVarArg()142
)
15171
4
    return false;
15172
142
15173
142
  // Make sure the callee and caller calling conventions are eligible for tco.
15174
142
  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
15175
142
                                           CI->getCallingConv()))
15176
0
      return false;
15177
142
15178
142
  // If the function is local then we have a good chance at tail-calling it
15179
142
  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
15180
142
}
15181
15182
1.77k
bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
15183
1.77k
  if (!Subtarget.hasVSX())
15184
386
    return false;
15185
1.38k
  if (Subtarget.hasP9Vector() && 
VT == MVT::f128593
)
15186
20
    return true;
15187
1.36k
  return VT == MVT::f32 || 
VT == MVT::f641.34k
||
15188
1.36k
    
VT == MVT::v4f321.27k
||
VT == MVT::v2f64626
;
15189
1.36k
}
15190
15191
bool PPCTargetLowering::
15192
6
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
15193
6
  const Value *Mask = AndI.getOperand(1);
15194
6
  // If the mask is suitable for andi. or andis. we should sink the and.
15195
6
  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
15196
6
    // Can't handle constants wider than 64-bits.
15197
6
    if (CI->getBitWidth() > 64)
15198
0
      return false;
15199
6
    int64_t ConstVal = CI->getZExtValue();
15200
6
    return isUInt<16>(ConstVal) ||
15201
6
      
(3
isUInt<16>(ConstVal >> 16)3
&&
!(ConstVal & 0xFFFF)1
);
15202
6
  }
15203
0
15204
0
  // For non-constant masks, we can always use the record-form and.
15205
0
  return true;
15206
0
}
15207
15208
// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15209
// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15210
// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15211
// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15212
// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15213
132
SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
15214
132
  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
15215
132
  assert(Subtarget.hasP9Altivec() &&
15216
132
         "Only combine this when P9 altivec supported!");
15217
132
  EVT VT = N->getValueType(0);
15218
132
  if (VT != MVT::v4i32 && 
VT != MVT::v8i16102
&&
VT != MVT::v16i888
)
15219
74
    return SDValue();
15220
58
15221
58
  SelectionDAG &DAG = DCI.DAG;
15222
58
  SDLoc dl(N);
15223
58
  if (N->getOperand(0).getOpcode() == ISD::SUB) {
15224
42
    // Even for signed integers, if it's known to be positive (as signed
15225
42
    // integer) due to zero-extended inputs.
15226
42
    unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
15227
42
    unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
15228
42
    if ((SubOpcd0 == ISD::ZERO_EXTEND ||
15229
42
         
SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG36
) &&
15230
42
        
(18
SubOpcd1 == ISD::ZERO_EXTEND18
||
15231
18
         
SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG12
)) {
15232
10
      return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15233
10
                         N->getOperand(0)->getOperand(0),
15234
10
                         N->getOperand(0)->getOperand(1),
15235
10
                         DAG.getTargetConstant(0, dl, MVT::i32));
15236
10
    }
15237
32
15238
32
    // For type v4i32, it can be optimized with xvnegsp + vabsduw
15239
32
    if (N->getOperand(0).getValueType() == MVT::v4i32 &&
15240
32
        
N->getOperand(0).hasOneUse()16
) {
15241
16
      return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15242
16
                         N->getOperand(0)->getOperand(0),
15243
16
                         N->getOperand(0)->getOperand(1),
15244
16
                         DAG.getTargetConstant(1, dl, MVT::i32));
15245
16
    }
15246
32
  }
15247
32
15248
32
  return SDValue();
15249
32
}
15250
15251
// For type v4i32/v8ii16/v16i8, transform
15252
// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15253
// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15254
// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15255
// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15256
SDValue PPCTargetLowering::combineVSelect(SDNode *N,
15257
36
                                          DAGCombinerInfo &DCI) const {
15258
36
  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
15259
36
  assert(Subtarget.hasP9Altivec() &&
15260
36
         "Only combine this when P9 altivec supported!");
15261
36
15262
36
  SelectionDAG &DAG = DCI.DAG;
15263
36
  SDLoc dl(N);
15264
36
  SDValue Cond = N->getOperand(0);
15265
36
  SDValue TrueOpnd = N->getOperand(1);
15266
36
  SDValue FalseOpnd = N->getOperand(2);
15267
36
  EVT VT = N->getOperand(1).getValueType();
15268
36
15269
36
  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
15270
36
      FalseOpnd.getOpcode() != ISD::SUB)
15271
0
    return SDValue();
15272
36
15273
36
  // ABSD only available for type v4i32/v8i16/v16i8
15274
36
  if (VT != MVT::v4i32 && 
VT != MVT::v8i1622
&&
VT != MVT::v16i814
)
15275
6
    return SDValue();
15276
30
15277
30
  // At least to save one more dependent computation
15278
30
  if (!(Cond.hasOneUse() || 
TrueOpnd.hasOneUse()0
||
FalseOpnd.hasOneUse()0
))
15279
0
    return SDValue();
15280
30
15281
30
  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15282
30
15283
30
  // Can only handle unsigned comparison here
15284
30
  switch (CC) {
15285
30
  default:
15286
0
    return SDValue();
15287
30
  case ISD::SETUGT:
15288
18
  case ISD::SETUGE:
15289
18
    break;
15290
18
  case ISD::SETULT:
15291
12
  case ISD::SETULE:
15292
12
    std::swap(TrueOpnd, FalseOpnd);
15293
12
    break;
15294
30
  }
15295
30
15296
30
  SDValue CmpOpnd1 = Cond.getOperand(0);
15297
30
  SDValue CmpOpnd2 = Cond.getOperand(1);
15298
30
15299
30
  // SETCC CmpOpnd1 CmpOpnd2 cond
15300
30
  // TrueOpnd = CmpOpnd1 - CmpOpnd2
15301
30
  // FalseOpnd = CmpOpnd2 - CmpOpnd1
15302
30
  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
15303
30
      
TrueOpnd.getOperand(1) == CmpOpnd224
&&
15304
30
      
FalseOpnd.getOperand(0) == CmpOpnd224
&&
15305
30
      
FalseOpnd.getOperand(1) == CmpOpnd124
) {
15306
24
    return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
15307
24
                       CmpOpnd1, CmpOpnd2,
15308
24
                       DAG.getTargetConstant(0, dl, MVT::i32));
15309
24
  }
15310
6
15311
6
  return SDValue();
15312
6
}